python naive bayes classifier example

Example 1: naive bayes classifier sklearn

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
print("Number of mislabeled points out of a total %d points : %d"
...       % (X_test.shape[0], (y_test != y_pred).sum()))
Number of mislabeled points out of a total 75 points : 4

Example 2: write a Program in Python/R to Demonstrate naive bayes classification

>>> from sklearn.naive_bayes import GaussianNB
>>> from sklearn.naive_bayes import MultinomialNB
>>> from sklearn import datasets
>>> from sklearn.metrics import confusion_matrix
>>> from sklearn.model_selection import train_test_split

>>> iris = datasets.load_iris()
>>> X = iris.data
>>> y = iris.target

# Split the data into a training set and a test set
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
>>> gnb = GaussianNB()
>>> mnb = MultinomialNB()

>>> y_pred_gnb = gnb.fit(X_train, y_train).predict(X_test)
>>> cnf_matrix_gnb = confusion_matrix(y_test, y_pred_gnb)

>>> print(cnf_matrix_gnb)
[[16 0 0]
 [ 0 18 0]
 [ 0 0 11]]

>>> y_pred_mnb = mnb.fit(X_train, y_train).predict(X_test)
>>> cnf_matrix_mnb = confusion_matrix(y_test, y_pred_mnb)

>>> print(cnf_matrix_mnb)
[[16 0 0]
 [ 0 0 18]
 [ 0 0 11]]Copy

Example 3: Naive Bayes Classifiers

# Naive Bayes Classifiers

from pyspark.sql import Row
from pyspark.ml.linalg import Vectors
df = spark.createDataFrame([
  Row(label=0.0, weight=0.1, features=Vectors.dense([0.0, 0.0])),
  Row(label=0.0, weight=0.5, features=Vectors.dense([0.0, 1.0])),
  Row(label=1.0, weight=1.0, features=Vectors.dense([1.0, 0.0]))])
nb = NaiveBayes(smoothing=1.0, modelType="multinomial", weightCol="weight")
model = nb.fit(df)
model.pi
# DenseVector([-0.81..., -0.58...])
model.theta
# DenseMatrix(2, 2, [-0.91..., -0.51..., -0.40..., -1.09...], 1)
test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
result = model.transform(test0).head()
result.prediction
# 1.0
result.probability
# DenseVector([0.32..., 0.67...])
result.rawPrediction
# DenseVector([-1.72..., 0.99...])
test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
model.transform(test1).head().prediction
# 1.0
nb_path = temp_path + "/nb"
nb.save(nb_path)
nb2 = NaiveBayes.load(nb_path)
nb2. getSmoothing()
# 1.0
model_path = temp_path + "/nb_model"
model.save(model_path)
model2 = NaiveBayesModel.load(model_path)
model.pi == model2.pi
# True
nb = nb.setThresholds([0.01, 10.00])
model3 = nb.fit(df)
result = model3.transform(test0).head()
result.prediction
# 0.0