Skip to content

Commit 0661642

Browse files
committed
add some problems in misc
1 parent 3c4b0e5 commit 0661642

File tree

19 files changed

+4762017
-0
lines changed

19 files changed

+4762017
-0
lines changed
Binary file not shown.
12.2 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from ngram_score import ngram_score
2+
from pycipher import Autokey
3+
import re
4+
from itertools import permutations
5+
6+
qgram = ngram_score('quadgrams.txt')
7+
trigram = ngram_score('trigrams.txt')
8+
9+
ctext = 'MPLRVFFCZEYOUJFJKYBXGZVDGQAURKXZOLKOLVTUFBLRNJESQITWAHXNSIJXPNMPLSHCJBTYHZEALOGVIAAISSPLFHLFSWFEHJNCRWHTINSMAMBVEXPZIZ'
10+
11+
ctext = re.sub(r'[^A-Z]','',ctext.upper())
12+
# keep a list of the N best things we have seen, discard anything else
13+
14+
class nbest(object):
15+
def __init__(self,N=1000):
16+
self.store = []
17+
self.N = N
18+
19+
def add(self,item):
20+
self.store.append(item)
21+
self.store.sort(reverse=True)
22+
self.store = self.store[:self.N]
23+
24+
def __getitem__(self,k):
25+
return self.store[k]
26+
27+
def __len__(self):
28+
return len(self.store)
29+
30+
#init
31+
N=100
32+
for KLEN in range(3,20):
33+
rec = nbest(N)
34+
for i in permutations('ABCDEFGHIJKLMNOPQRSTUVWXYZ',3):
35+
key = ''.join(i) + 'A'*(KLEN-len(i))
36+
pt = Autokey(key).decipher(ctext)
37+
score = 0
38+
for j in range(0,len(ctext),KLEN):
39+
score += trigram.score(pt[j:j+3])
40+
rec.add((score,''.join(i),pt[:30]))
41+
42+
next_rec = nbest(N)
43+
for i in range(0,KLEN-3):
44+
for k in xrange(N):
45+
for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':
46+
key = rec[k][1] + c
47+
fullkey = key + 'A'*(KLEN-len(key))
48+
pt = Autokey(fullkey).decipher(ctext)
49+
score = 0
50+
for j in range(0,len(ctext),KLEN):
51+
score += qgram.score(pt[j:j+len(key)])
52+
next_rec.add((score,key,pt[:30]))
53+
rec = next_rec
54+
next_rec = nbest(N)
55+
bestkey = rec[0][1]
56+
pt = Autokey(bestkey).decipher(ctext)
57+
bestscore = qgram.score(pt)
58+
for i in range(N):
59+
pt = Autokey(rec[i][1]).decipher(ctext)
60+
score = qgram.score(pt)
61+
if score > bestscore:
62+
bestkey = rec[i][1]
63+
bestscore = score
64+
print bestscore,'autokey, klen',KLEN,':"'+bestkey+'",',Autokey(bestkey).decipher(ctext)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
'''
2+
Allows scoring of text using n-gram probabilities
3+
17/07/12
4+
'''
5+
from math import log10
6+
7+
class ngram_score(object):
8+
def __init__(self,ngramfile,sep=' '):
9+
''' load a file containing ngrams and counts, calculate log probabilities '''
10+
self.ngrams = {}
11+
for line in file(ngramfile):
12+
key,count = line.split(sep)
13+
self.ngrams[key] = int(count)
14+
self.L = len(key)
15+
self.N = sum(self.ngrams.itervalues())
16+
#calculate log probabilities
17+
for key in self.ngrams.keys():
18+
self.ngrams[key] = log10(float(self.ngrams[key])/self.N)
19+
self.floor = log10(0.01/self.N)
20+
21+
def score(self,text):
22+
''' compute the score of text '''
23+
score = 0
24+
ngrams = self.ngrams.__getitem__
25+
for i in xrange(len(text)-self.L+1):
26+
if text[i:i+self.L] in self.ngrams: score += ngrams(text[i:i+self.L])
27+
else: score += self.floor
28+
return score
29+

0 commit comments

Comments
 (0)