Plot the regression lines with decision trees of depths code example
Example: Decision tree learning algorithm for regression
from pyspark.ml.linalg import Vectors
df = spark.createDataFrame([
(1.0, Vectors.dense(1.0)),
(0.0, Vectors.sparse(1, [], []))], ["label", "features"])
dt = DecisionTreeRegressor(maxDepth=2, varianceCol="variance")
model = dt.fit(df)
model.depth
model.numNodes
model.featureImportances
model.numFeatures
test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
model.transform(test0).head().prediction
test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
model.transform(test1).head().prediction
dtr_path = temp_path + "/dtr"
dt.save(dtr_path)
dt2 = DecisionTreeRegressor.load(dtr_path)
dt2.getMaxDepth()
model_path = temp_path + "/dtr_model"
model.save(model_path)
model2 = DecisionTreeRegressionModel.load(model_path)
model.numNodes == model2.numNodes
model.depth == model2.depth
model.transform(test1).head().variance