Example 1: gridsearchcv
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=50)
xgb=XGBClassifier()
----------------------------------------------------------------------
from sklearn.model_selection import GridSearchCV
parameters=[{'learning_rate':[0.1,0.2,0.3,0.4],'max_depth':[3,4,5,6,7,8],'colsample_bytree':[0.5,0.6,0.7,0.8,0.9]}]
gscv=GridSearchCV(xgb,parameters,scoring='accuracy',n_jobs=-1,cv=10)
grid_search=gscv.fit(x,y)
grid_search.best_params_
-----------------------------------------------------------------------
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=50)
xgb=XGBClassifier(colsample_bytree=0.8, learning_rate=0.4, max_depth=4)
xgb.fit(x,y)
pred=xgb.predict(x_test)
print('Accuracy= ',accuracy_score(y_test,pred))
-----------------------------------------------------------------------
#Cross validating (for classification) the model and checking the cross_val_score,model giving highest score will be choosen as final model.
from sklearn.model_selection import cross_val_predict
xgb=XGBClassifier(colsample_bytree=0.8, learning_rate=0.4, max_depth=4)
cvs=cross_val_score(xgb,x,y,scoring='accuracy',cv=10)
print('cross_val_scores= ',cvs.mean())
y_pred=cross_val_predict(xgb,x,y,cv=10)
conf_mat=confusion_matrix(y_pred,y)
conf_mat
---------------------------------------------------------------------------
#Cross validating(for regression) the model and checking the cross_val_score,model giving highest score will be choosen as final model.
gbm=GradientBoostingRegressor(max_depth=7,min_samples_leaf=1,n_estimators=100)
cvs=cross_val_score(xgb,x,y,scoring='r2',cv=5)
print('cross_val_scores= ',cvs.mean())
-------------------------------------------------------------------------------
#parameters
#xgboost:-
parameters=[{'learning_rate':[0.1,0.2,0.3,0.4],'max_depth':[3,4,5,6,7,8],'colsample_bytree':[0.5,0.6,0.7,0.8,0.9]}]
#random forest
parameters=[{'max_depth':[5,7,9,10],'min_samples_leaf':[1,2],'n_estimators':[100,250,500]}]
#gradientboost
parameters=[{'max_depth':[5,7,9,10],'min_samples_leaf':[1,2],'n_estimators':[100,250,500]}]
#kneighbors
parameters={'n_neighbors':[5,6,8,10,12,14,15]}
#logistic regression
parameters={'penalty':['l1','l2'],'C':[1,2,3,4,5]}
#gaussiannb
parameters={'var_smoothing': np.logspace(0,-9, num=100)}
#SVC
parameters=[{'C':[0.1,0.5,1,2,3],'kernel':['rbf','poly']}]
#adaboost
parameters=[{'base_estimator':[lr],'learning_rate':[1,0.1,0.001],'n_estimators':[100,150,250]}]
#decesion tree
parameters=[{'criterion':['gini','entropy'],'max_depth':[5,7,9,10],'min_samples_leaf':[1,2]}]
Example 2: gridsearchcv multiple estimators
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
names = [
"Naive Bayes",
"Linear SVM",
"Logistic Regression",
"Random Forest",
"Multilayer Perceptron"
]
classifiers = [
MultinomialNB(),
LinearSVC(),
LogisticRegression(),
RandomForestClassifier(),
MLPClassifier()
]
parameters = [
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__alpha': (1e-2, 1e-3)},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__C': (np.logspace(-5, 1, 5))},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__C': (np.logspace(-5, 1, 5))},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__max_depth': (1, 2)},
{'vect__ngram_range': [(1, 1), (1, 2)],
'clf__alpha': (1e-2, 1e-3)}
]
for name, classifier, params in zip(names, classifiers, parameters):
clf_pipe = Pipeline([
('vect', TfidfVectorizer(stop_words='english')),
('clf', classifier),
])
gs_clf = GridSearchCV(clf_pipe, param_grid=params, n_jobs=-1)
clf = gs_clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print("{} score: {}".format(name, score))