How to create a wordcloud from a TDM in python?


I am trying to create wordclouds to the result of a CountVectorizer which is a term document matrix, can’t find any package that does the same. Any help will be appreciated.

How can I create word cloud in Python?

Install wordcloud package using “pip install wordcloud”. Then u can use it.

# import library
import pandas as pd 
import re
from wordcloud import WordCloud

# Load spreadsheet
xl = pd.ExcelFile(file)
# Load a sheet into a DataFrame by name: df1
df1 = xl.parse('Sheet1')

data_pos = df1[df1["class"] == 'Positive']
data_pos = len(data_pos['text'])

# Data preparation
def wordcloud_draw(data, color = "black"):
    data_words = ' '.join(data)
    #pos_paragraphs = [p for p in data_words.split('\n') if p]    
    regex = re.compile('^[a-zA-Z]*')
    cleaned_words_tokens = []
    for word in data_words.split():
        found = False
        for match in regex.finditer(word):
            found = True

    cleaned_words_tokens = filter(None, cleaned_words_tokens)
    cleaned_words_tokens = str([token.encode('UTF8') for token in cleaned_words_tokens])
    #cleaned_words_tokens = str(filter(None, cleaned_words_tokens))
    wordcloud = WordCloud(stopwords=STOPWORDS,
    plt.figure(1,figsize=(13, 13))

# Positive sentiment
print("Positive words")

Find files here (638.4 KB)