sklearn

归一化处理

from sklearn.preprocessing import MinMaxScaler

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

# Scale back the data to the original representation
inv_y = scaler.inverse_transform(inv_y)

求RMSE

from sklearn.metrics import mean_squared_error

rmse = sqrt(mean_squared_error(inv_y, inv_yhat))

GridSearchCV

# Exhaustive search over specified parameter values for an estimator.
# 自动调参

# 设置超参数
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

svm_model = svm.SVC()

# 获取选择的最优模型
clf = GridSearchCV(svm_model, param_grid, cv=5)
clf.fit(X_train, y_train)

# 获取最优模型
best_model = clf.best_estimator_

# 查看最优超参数配置
print(clf.best_params_)

labelEncoder

# Encode target labels with value between 0 and n_classes-1.
>>> from sklearn import preprocessing
>>> le = preprocessing.LabelEncoder()
>>> le.fit([1, 2, 2, 6])
LabelEncoder()
>>> le.classes_
array([1, 2, 6])
>>> le.transform([1, 1, 2, 6])
array([0, 0, 1, 2]...)
>>> le.inverse_transform([0, 0, 1, 2])
array([1, 1, 2, 6])