correlation using python code example

Example 1: correlation python

import numpy as np
import scipy.stats
x = np.arange(15, 20)
y = np.arange(5, 10)
stat, p = scipy.stats.pearsonr(x, y)

Example 2: python pearson correlation

import scipy
# x and y are numpy array with shape (N,)
coeff, _ = scipy.stats.pearsonr(x, y)

Example 3: correlation mlib

import org.apache.spark.ml.linalg.{Matrix, Vectors}
import org.apache.spark.ml.stat.Correlation
import org.apache.spark.sql.Row

val data = Seq(
  Vectors.sparse(4, Seq((0, 1.0), (3, -2.0))),
  Vectors.dense(4.0, 5.0, 0.0, 3.0),
  Vectors.dense(6.0, 7.0, 0.0, 8.0),
  Vectors.sparse(4, Seq((0, 9.0), (3, 1.0)))
)

val df = data.map(Tuple1.apply).toDF("features")
val Row(coeff1: Matrix) = Correlation.corr(df, "features").head
println("Pearson correlation matrix:\n" + coeff1.toString)

val Row(coeff2: Matrix) = Correlation.corr(df, "features", "spearman").head
println("Spearman correlation matrix:\n" + coeff2.toString)