binary classification metrics code example

Example: Evaluator for binary classification

# Evaluator for binary classification

from pyspark.ml.linalg import Vectors
scoreAndLabels = map(lambda x: (Vectors.dense(
  [1.0 - x[0], x[0]]), x[1]), [
  (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
# ...
evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw")
evaluator.evaluate(dataset)
# 0.70...
evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
# 0.83...
bce_path = temp_path + "/bce"
evaluator.save(bce_path)
evaluator2 = BinaryClassificationEvaluator.load(bce_path)
str(evaluator2.getRawPredictionCol())
# 'raw'