|
| 1 | +# input your code here! |
| 2 | + |
| 3 | +def word_count_distribution(text): |
| 4 | + |
| 5 | + |
| 6 | + word_counts = Counter(count_words_fast(text)) |
| 7 | + |
| 8 | + count_distribution = dict(Counter(word_counts.values())) |
| 9 | + |
| 10 | + return count_distribution |
| 11 | + |
| 12 | + |
| 13 | + |
| 14 | +distribution = word_count_distribution(text) |
| 15 | + |
| 16 | + |
| 17 | +# input your code here! |
| 18 | + |
| 19 | +def more_frequent(distribution): |
| 20 | + |
| 21 | + z = {} |
| 22 | + for i in range(len(distribution)): |
| 23 | + if(distribution[i] > i): |
| 24 | + z[i] = distribution[i] |
| 25 | + |
| 26 | + return z |
| 27 | + |
| 28 | +more_frequent(distribution) |
| 29 | + |
| 30 | + |
| 31 | + |
| 32 | +hamlets = pd.DataFrame(columns = ["language","distribution"]) |
| 33 | +book_dir = "Books" |
| 34 | +title_num = 1 |
| 35 | +for language in book_titles: |
| 36 | + for author in book_titles[language]: |
| 37 | + for title in book_titles[language][author]: |
| 38 | + if title == "Hamlet": |
| 39 | + inputfile = data_filepath+"Books/"+language+"/"+author+"/"+title+".txt" |
| 40 | + text = read_book(inputfile) |
| 41 | + distribution = word_count_distribution(text) |
| 42 | + hamlets.loc[title_num] = language, distribution |
| 43 | + title_num += 1 |
| 44 | + |
| 45 | + |
| 46 | + |
| 47 | +colors = ["crimson", "forestgreen", "blueviolet"] |
| 48 | +handles, hamlet_languages = [], [] |
| 49 | +for index in range(hamlets.shape[0]): |
| 50 | + language, distribution = hamlets.language[index+1], hamlets.distribution[index+1] |
| 51 | + dist = more_frequent(distribution) |
| 52 | + plot, = plt.loglog(sorted(list(dist.keys())),sorted(list(dist.values()), |
| 53 | + reverse = True), color = colors[index], linewidth = 2) |
| 54 | + handles.append(plot) |
| 55 | + hamlet_languages.append(language) |
| 56 | +plt.title("Word Frequencies in Hamlet Translations") |
| 57 | +xlim = [0, 2e3] |
| 58 | +xlabel = "Frequency of Word $W$" |
| 59 | +ylabel = "Fraction of Words\nWith Greater Frequency than $W$" |
| 60 | +plt.xlim(xlim); plt.xlabel(xlabel); plt.ylabel(ylabel) |
| 61 | +plt.legend(handles, hamlet_languages, loc = "upper right", numpoints = 1) |
| 62 | +# show your plot using `plt.show`! |
| 63 | +plt.show() |
| 64 | + |
0 commit comments