best evaluation metric for binary classification code example
Example: Evaluator for binary classification
from pyspark.ml.linalg import Vectors
scoreAndLabels = map(lambda x: (Vectors.dense(
[1.0 - x[0], x[0]]), x[1]), [
(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw")
evaluator.evaluate(dataset)
evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
bce_path = temp_path + "/bce"
evaluator.save(bce_path)
evaluator2 = BinaryClassificationEvaluator.load(bce_path)
str(evaluator2.getRawPredictionCol())