In [9]:
import re

def word_count(fname):
    with open(fname) as f:
        words = re.findall(r'\w+', f.read().lower()) 
        print(len(words))
        wc = {}
        for word in words:
            if word in wc:
                wc[word] += 1
            else:
                wc[word] = 1
        return wc
            
print(word_count('lipogram.txt'))
154
{'i': 5, 'was': 5, 'born': 1, 'in': 5, 'midtown': 1, 'manhattan': 1, 'right': 1, 'as': 2, 'world': 1, 'war': 1, 'two': 2, 'drawing': 1, 'to': 6, 'a': 6, 'uhmm': 1, 'conclusion': 1, 'my': 8, 'dad': 3, 'physics': 1, 'prof': 1, 'at': 2, 'an': 3, 'august': 1, 'institution': 1, 'roughly': 1, 'hour': 1, 'south': 1, 'by': 1, 'train': 1, 'and': 8, 'until': 1, 'or': 1, 'so': 3, 'did': 3, 'wrong': 1, 'way': 2, 'commuting': 1, 'work': 2, 'back': 1, 'finally': 1, 'our': 2, 'family': 1, 'found': 1, 'flat': 1, 'had': 1, 'short': 1, 'stint': 1, 'living': 1, 'that': 2, 'most': 3, 'ivy': 2, 'of': 3, 'towns': 1, 'but': 1, 'around': 1, 'fifth': 1, 'birthday': 1, 'got': 2, 'alluring': 1, 'invitation': 1, 'out': 1, 'california': 1, 'folks': 1, 'baby': 1, 'sis': 1, 'laura': 1, 'all': 1, 'into': 1, 'car': 1, 'took': 1, 'off': 1, 'on': 2, 'cross': 1, 'country': 1, 'jaunt': 1, 'soon': 1, 'wound': 1, 'up': 2, 'stanford': 2, 'growing': 1, 'campus': 1, 'going': 1, 'junior': 1, 'high': 2, 'school': 1, 'palo': 1, 'alto': 1, 'it': 1, 'natural': 1, 'should': 1, 'go': 1, 'cohorts': 1, 'fact': 1}
In [4]:
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt 
import re

def word_count(fname):
    with open(fname) as f:
        words = re.findall(r'\w+', f.read().lower()) 
        #print(len(words))
        wc = {}
        for word in words:
            if word in STOPWORDS:
                continue
            if word in wc:
                wc[word] += 1
            else:
                wc[word] = 1
        return wc

def generate_word_cloud(fname):
    wc = word_count(fname)
    wordcloud = WordCloud(background_color="white",width=600,height=400,relative_scaling=0.5).generate_from_frequencies(wc)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()

generate_word_cloud('aaai-2019-titles')
#generate_word_cloud('lipogram.txt')
#generate_word_cloud('nips-2019')