Skip to content

Commit 9dd4c13

Browse files
authored
Create translation_hamlet.py
1 parent c9641fe commit 9dd4c13

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

translation_hamlet.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# input your code here!
2+
3+
def word_count_distribution(text):
4+
5+
6+
word_counts = Counter(count_words_fast(text))
7+
8+
count_distribution = dict(Counter(word_counts.values()))
9+
10+
return count_distribution
11+
12+
13+
14+
distribution = word_count_distribution(text)
15+
16+
17+
# input your code here!
18+
19+
def more_frequent(distribution):
20+
21+
z = {}
22+
for i in range(len(distribution)):
23+
if(distribution[i] > i):
24+
z[i] = distribution[i]
25+
26+
return z
27+
28+
more_frequent(distribution)
29+
30+
31+
32+
hamlets = pd.DataFrame(columns = ["language","distribution"])
33+
book_dir = "Books"
34+
title_num = 1
35+
for language in book_titles:
36+
for author in book_titles[language]:
37+
for title in book_titles[language][author]:
38+
if title == "Hamlet":
39+
inputfile = data_filepath+"Books/"+language+"/"+author+"/"+title+".txt"
40+
text = read_book(inputfile)
41+
distribution = word_count_distribution(text)
42+
hamlets.loc[title_num] = language, distribution
43+
title_num += 1
44+
45+
46+
47+
colors = ["crimson", "forestgreen", "blueviolet"]
48+
handles, hamlet_languages = [], []
49+
for index in range(hamlets.shape[0]):
50+
language, distribution = hamlets.language[index+1], hamlets.distribution[index+1]
51+
dist = more_frequent(distribution)
52+
plot, = plt.loglog(sorted(list(dist.keys())),sorted(list(dist.values()),
53+
reverse = True), color = colors[index], linewidth = 2)
54+
handles.append(plot)
55+
hamlet_languages.append(language)
56+
plt.title("Word Frequencies in Hamlet Translations")
57+
xlim = [0, 2e3]
58+
xlabel = "Frequency of Word $W$"
59+
ylabel = "Fraction of Words\nWith Greater Frequency than $W$"
60+
plt.xlim(xlim); plt.xlabel(xlabel); plt.ylabel(ylabel)
61+
plt.legend(handles, hamlet_languages, loc = "upper right", numpoints = 1)
62+
# show your plot using `plt.show`!
63+
plt.show()
64+

0 commit comments

Comments
 (0)