how to improve accuracy of random forest classifier code example
Example 1: how to improve accuracy of random forest classifier
from sklearn.model_selection import GridSearchCV
cv = GridSearchCV(rfc,parameters,cv=5)
cv.fit(train_features,train_label.values.ravel())
Example 2: how to improve accuracy of random forest classifier
display(cv)
Example 3: how to improve accuracy of random forest classifier
def display(results):
print(f'Best parameters are: {results.best_params_}')
print("\n")
mean_score = results.cv_results_['mean_test_score']
std_score = results.cv_results_['std_test_score']
params = results.cv_results_['params']
for mean,std,params in zip(mean_score,std_score,params):
print(f'{round(mean,3)} + or -{round(std,3)} for the {params}')
Example 4: how to improve accuracy of random forest classifier
# Find number of features for cumulative importance of 95%# Add 1 because Python is zero-indexedprint('Number of features for 95% importance:', np.where(cumulative_importances > 0.95)[0][0] + 1)Number of features for 95% importance: 6
Example 5: how to improve accuracy of random forest classifier
# List of features sorted from most to least importantsorted_importances = [importance[1] for importance in feature_importances]sorted_features = [importance[0] for importance in feature_importances]# Cumulative importancescumulative_importances = np.cumsum(sorted_importances)# Make a line graphplt.plot(x_values, cumulative_importances, 'g-')# Draw line at 95% of importance retainedplt.hlines(y = 0.95, xmin=0, xmax=len(sorted_importances), color = 'r', linestyles = 'dashed')# Format x ticks and labelsplt.xticks(x_values, sorted_features, rotation = 'vertical')# Axis labels and titleplt.xlabel('Variable'); plt.ylabel('Cumulative Importance'); plt.title('Cumulative Importances');