sklearn
归一化处理
from sklearn.preprocessing import MinMaxScaler
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# Scale back the data to the original representation
inv_y = scaler.inverse_transform(inv_y)
求RMSE
from sklearn.metrics import mean_squared_error
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
GridSearchCV
# Exhaustive search over specified parameter values for an estimator.
# 自动调参
# 设置超参数
param_grid = [
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
svm_model = svm.SVC()
# 获取选择的最优模型
clf = GridSearchCV(svm_model, param_grid, cv=5)
clf.fit(X_train, y_train)
# 获取最优模型
best_model = clf.best_estimator_
# 查看最优超参数配置
print(clf.best_params_)
labelEncoder
# Encode target labels with value between 0 and n_classes-1.
>>> from sklearn import preprocessing
>>> le = preprocessing.LabelEncoder()
>>> le.fit([1, 2, 2, 6])
LabelEncoder()
>>> le.classes_
array([1, 2, 6])
>>> le.transform([1, 1, 2, 6])
array([0, 0, 1, 2]...)
>>> le.inverse_transform([0, 0, 1, 2])
array([1, 1, 2, 6])