How to create a word cloud from a corpus in Python?
In case you require these word clouds for showing them in website or web app you can convert your data to json or csv format and load it to a JavaScript visualisation library such as d3. Word Clouds on d3
If not, Marcin's answer is a good way for doing what you describe.
here is the short code
#make wordcoud
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)
def show_wordcloud(data, title = None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1 # chosen at random by flipping a coin; it was heads
).generate(str(data))
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
if __name__ == '__main__':
show_wordcloud(text_str)
Example of amueller's code in action
In command-line / terminal:
sudo pip install wordcloud
Then run python script:
## Simple WordCloud
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
text = 'all your base are belong to us all of your base base base'
def generate_wordcloud(text): # optionally add: stopwords=STOPWORDS and change the arg below
wordcloud = WordCloud(font_path='/Library/Fonts/Verdana.ttf',
width=800, height=400,
relative_scaling = 1.0,
stopwords = {'to', 'of'} # set or space-separated string
).generate(text)
fig = plt.figure(1, figsize=(8, 4))
plt.axis('off')
plt.imshow(wordcloud)
plt.axis("off")
## Pick One:
# plt.show()
plt.savefig("WordCloud.png")
generate_wordcloud(text)
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
stopwords = set(STOPWORDS)
def show_wordcloud(data, title = None):
wordcloud = WordCloud(
background_color='white',
stopwords=stopwords,
max_words=200,
max_font_size=40,
scale=3,
random_state=1 # chosen at random by flipping a coin; it was heads
).generate(str(data))
fig = plt.figure(1, figsize=(12, 12))
plt.axis('off')
if title:
fig.suptitle(title, fontsize=20)
fig.subplots_adjust(top=2.3)
plt.imshow(wordcloud)
plt.show()
show_wordcloud(Samsung_Reviews_Negative['Reviews'])
show_wordcloud(Samsung_Reviews_positive['Reviews'])