s = 'Hi hi hi bye bye bye word count' sc.parallelize(seq).map(lambda word: (word, 1)).reduceByKey(add).collect() Output: [('count', 1), ('word', 1), ('bye', 3), ('hi', 2), ('Hi', 1)] code example
Example: : s = 'Hi hi hi bye bye bye word count’ sc.parallelize(seq).map(lambda word: (word, 1)).reduceByKey(add).collect()
from operator import add
s = 'Hi hi hi bye bye bye word count'
seq = s.split() # ['Hi', 'hi', 'hi', 'bye', 'bye', 'bye', 'word', 'count']
sc.parallelize(seq)\
.map(lambda word: (word, 1))\
.reduceByKey(add)\
.collect()