sklearn.model_selection.GridSearchCV code example

Example 1: gridsearchcv

x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=50)
xgb=XGBClassifier()
----------------------------------------------------------------------
from sklearn.model_selection import GridSearchCV
parameters=[{'learning_rate':[0.1,0.2,0.3,0.4],'max_depth':[3,4,5,6,7,8],'colsample_bytree':[0.5,0.6,0.7,0.8,0.9]}]
            
gscv=GridSearchCV(xgb,parameters,scoring='accuracy',n_jobs=-1,cv=10)
grid_search=gscv.fit(x,y)
grid_search.best_params_
-----------------------------------------------------------------------
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=50)
xgb=XGBClassifier(colsample_bytree=0.8, learning_rate=0.4, max_depth=4)
xgb.fit(x,y)
pred=xgb.predict(x_test)
print('Accuracy=  ',accuracy_score(y_test,pred))
-----------------------------------------------------------------------
#Cross validating (for classification) the model and checking the cross_val_score,model giving highest score will be choosen as final model.
from sklearn.model_selection import cross_val_predict
xgb=XGBClassifier(colsample_bytree=0.8, learning_rate=0.4, max_depth=4)
cvs=cross_val_score(xgb,x,y,scoring='accuracy',cv=10)
print('cross_val_scores=  ',cvs.mean())
y_pred=cross_val_predict(xgb,x,y,cv=10)
conf_mat=confusion_matrix(y_pred,y)
conf_mat
---------------------------------------------------------------------------
#Cross validating(for regression) the model and checking the cross_val_score,model giving highest score will be choosen as final model.
gbm=GradientBoostingRegressor(max_depth=7,min_samples_leaf=1,n_estimators=100)
cvs=cross_val_score(xgb,x,y,scoring='r2',cv=5)
print('cross_val_scores=  ',cvs.mean())
-------------------------------------------------------------------------------
#parameters
#xgboost:-
parameters=[{'learning_rate':[0.1,0.2,0.3,0.4],'max_depth':[3,4,5,6,7,8],'colsample_bytree':[0.5,0.6,0.7,0.8,0.9]}]
#random forest
parameters=[{'max_depth':[5,7,9,10],'min_samples_leaf':[1,2],'n_estimators':[100,250,500]}]
#gradientboost
parameters=[{'max_depth':[5,7,9,10],'min_samples_leaf':[1,2],'n_estimators':[100,250,500]}]
#kneighbors
parameters={'n_neighbors':[5,6,8,10,12,14,15]}
#logistic regression
parameters={'penalty':['l1','l2'],'C':[1,2,3,4,5]}
#gaussiannb
parameters={'var_smoothing': np.logspace(0,-9, num=100)}
#SVC
parameters=[{'C':[0.1,0.5,1,2,3],'kernel':['rbf','poly']}]
#adaboost
parameters=[{'base_estimator':[lr],'learning_rate':[1,0.1,0.001],'n_estimators':[100,150,250]}]
#decesion tree
parameters=[{'criterion':['gini','entropy'],'max_depth':[5,7,9,10],'min_samples_leaf':[1,2]}]

Example 2: gridsearchcv multiple estimators

from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

names = [
         "Naive Bayes",
         "Linear SVM",
         "Logistic Regression",
         "Random Forest",
         "Multilayer Perceptron"
        ]

classifiers = [
    MultinomialNB(),
    LinearSVC(),
    LogisticRegression(),
    RandomForestClassifier(),
    MLPClassifier()
]

parameters = [
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__alpha': (1e-2, 1e-3)},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__C': (np.logspace(-5, 1, 5))},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__max_depth': (1, 2)},
              {'vect__ngram_range': [(1, 1), (1, 2)],
              'clf__alpha': (1e-2, 1e-3)}
             ]

for name, classifier, params in zip(names, classifiers, parameters):
    clf_pipe = Pipeline([
        ('vect', TfidfVectorizer(stop_words='english')),
        ('clf', classifier),
    ])
    gs_clf = GridSearchCV(clf_pipe, param_grid=params, n_jobs=-1)
    clf = gs_clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print("{} score: {}".format(name, score))