pyspark over partition by code example
Example: window.partitionby
import sys
from pyspark.sql.window import Window
import pyspark.sql.functions as func
windowSpec = \
Window
.partitionBy(df['category']) \
.orderBy(df['revenue'].desc()) \
.rangeBetween(-sys.maxsize, sys.maxsize)
dataFrame = sqlContext.table("productRevenue")
revenue_difference = \
(func.max(dataFrame['revenue']).over(windowSpec) - dataFrame['revenue'])
dataFrame.select(
dataFrame['product'],
dataFrame['category'],
dataFrame['revenue'],
revenue_difference.alias("revenue_difference"))