decision tree classification code example
Example 1: decision tree algorithm in python
clf = DecisionTreeClassifier(criterion="entropy", max_depth=3)
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
Example 2: Decision tree learning algorithm for classification
from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import StringIndexer
df = spark.createDataFrame([
(1.0, Vectors.dense(1.0)),
(0.0, Vectors.sparse(1, [], []))], ["label", "features"])
stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
si_model = stringIndexer.fit(df)
td = si_model.transform(df)
dt = DecisionTreeClassifier(maxDepth=2, labelCol="indexed")
model = dt.fit(td)
model.numNodes
model.depth
model.featuresImportances
model.numFeatures
model.numClasses
print(model.toDebugString)
test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
result = model.transform(test0).head()
result.prediction
result.probability
result.rawPrediction
test1 = spark.createDataFrame([Vectors.sparse(1, [0], [1.0]),)], ["features"])
model.transform(test1).head().prediction
dtc_path = temp_path + "/dtc"
dt.save(dtc_path)
dt2 = DecisionTreeClassifier.load(dtc_path)
dt2.getMaxDepth()
model_path = temp_path + "/dtc_model"
model.save(model_path)
model2 = DecisionTreeClassificationModel.load(model_path)
model.featureImportances == model2.featureImportances