Shift cipher

Frequence analysis

In [1]:
ct = "Dolu fvb dlyl h ahkwvsl huk P dhz h mpzo Pu aol Whslvgvpj aptl, Huk zpkl if zpkl vu aol liipun apkl Dl zwyhdslk aoyvbno aol vvgl huk zsptl, Vy zrpaalylk dpao thuf h jhbkhs mspw aoyvbno aol klwaoz vm Jhtiyphu mlu Tf olhya dhz ypml dpao aol qvf vm spml, Mvy P svclk fvb lclu aolu."
In [2]:
len(ct)
Out[2]:
278
In [3]:
from collections import Counter

Counter(ct)
Out[3]:
Counter({'D': 2,
         'o': 16,
         'l': 30,
         'u': 12,
         ' ': 56,
         'f': 6,
         'v': 15,
         'b': 5,
         'd': 6,
         'y': 10,
         'h': 16,
         'a': 17,
         'k': 12,
         'w': 4,
         's': 8,
         'P': 3,
         'z': 9,
         'm': 7,
         'p': 15,
         'W': 1,
         'g': 2,
         'j': 2,
         't': 4,
         ',': 3,
         'H': 1,
         'i': 4,
         'n': 3,
         'V': 1,
         'r': 1,
         'J': 1,
         'T': 1,
         'q': 1,
         'M': 1,
         'c': 2,
         '.': 1})
In [4]:
Counter(ct.upper())
Out[4]:
Counter({'D': 8,
         'O': 16,
         'L': 30,
         'U': 12,
         ' ': 56,
         'F': 6,
         'V': 16,
         'B': 5,
         'Y': 10,
         'H': 17,
         'A': 17,
         'K': 12,
         'W': 5,
         'S': 8,
         'P': 18,
         'Z': 9,
         'M': 8,
         'G': 2,
         'J': 3,
         'T': 5,
         ',': 3,
         'I': 4,
         'N': 3,
         'R': 1,
         'Q': 1,
         'C': 2,
         '.': 1})
In [5]:
#Five most frequent letters
Counter(ct.upper()).most_common(5)
Out[5]:
[(' ', 56), ('L', 30), ('P', 18), ('H', 17), ('A', 17)]

The biggest (omitting the space) is L (30 times), that means E (the most often used letter in English) was shifted to L ... $12-5 = 7$

frequence coincidence

In [6]:
eng_freq = [ .082, .015, .028, .043, .127, .022, .020, .061, .070, .002, .008, .040, .024, .067, .075, .019, .001, .060, .063, .091, .028, .010, .023, .001, .020, .001]

from numpy import dot
dot(eng_freq, eng_freq)
Out[6]:
0.065601
In [7]:
Counter(filter(str.isalpha, ct.upper()))
Out[7]:
Counter({'D': 8,
         'O': 16,
         'L': 30,
         'U': 12,
         'F': 6,
         'V': 16,
         'B': 5,
         'Y': 10,
         'H': 17,
         'A': 17,
         'K': 12,
         'W': 5,
         'S': 8,
         'P': 18,
         'Z': 9,
         'M': 8,
         'G': 2,
         'J': 3,
         'T': 5,
         'I': 4,
         'N': 3,
         'R': 1,
         'Q': 1,
         'C': 2})
In [8]:
sum(Counter(filter(str.isalpha, ct.upper())).values())
Out[8]:
218
In [50]:
from string import ascii_uppercase 

ascii_uppercase
Out[50]:
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
In [53]:
counter = Counter(filter(str.isalpha, ct.upper()))
n = sum(counter.values())
freq_perc = [ counter[letter]/n for letter in ascii_uppercase]
print(freq_perc)
[0.02127659574468085, 0.019036954087346025, 0.03135498320268757, 0.030235162374020158, 0.043673012318029114, 0.023516237402015677, 0.029115341545352745, 0.027995520716685332, 0.0851063829787234, 0.043673012318029114, 0.029115341545352745, 0.0167973124300112, 0.060470324748040316, 0.020156774916013438, 0.03471444568868981, 0.03807390817469205, 0.03807390817469205, 0.05375139977603583, 0.03919372900335946, 0.03471444568868981, 0.01791713325867861, 0.0683090705487122, 0.043673012318029114, 0.06270996640537514, 0.023516237402015677, 0.06382978723404255]
In [54]:
dot(freq_perc, eng_freq)
Out[54]:
0.037318029115341544
In [58]:
print(freq_perc)
[0.02127659574468085, 0.019036954087346025, 0.03135498320268757, 0.030235162374020158, 0.043673012318029114, 0.023516237402015677, 0.029115341545352745, 0.027995520716685332, 0.0851063829787234, 0.043673012318029114, 0.029115341545352745, 0.0167973124300112, 0.060470324748040316, 0.020156774916013438, 0.03471444568868981, 0.03807390817469205, 0.03807390817469205, 0.05375139977603583, 0.03919372900335946, 0.03471444568868981, 0.01791713325867861, 0.0683090705487122, 0.043673012318029114, 0.06270996640537514, 0.023516237402015677, 0.06382978723404255]
In [13]:
from numpy import roll
roll(freq_perc, 2)
Out[13]:
array([0.04587156, 0.0412844 , 0.07798165, 0.02293578, 0.00917431,
       0.03669725, 0.        , 0.02752294, 0.00917431, 0.07798165,
       0.01834862, 0.01376147, 0.05504587, 0.13761468, 0.03669725,
       0.01376147, 0.0733945 , 0.08256881, 0.00458716, 0.00458716,
       0.03669725, 0.02293578, 0.05504587, 0.0733945 , 0.02293578,
       0.        ])
In [14]:
dot(roll(freq_perc, 2), eng_freq)
Out[14]:
0.03504128440366973
In [15]:
for shift in range(26):
    print(shift, dot(roll(freq_perc, shift), eng_freq))
0 0.03869266055045872
1 0.03429357798165138
2 0.03504128440366973
3 0.0399908256880734
4 0.04433944954128441
5 0.03754587155963303
6 0.040472477064220184
7 0.04020642201834863
8 0.0453256880733945
9 0.03659633027522936
10 0.03198165137614679
11 0.03464220183486239
12 0.038871559633027526
13 0.03320642201834863
14 0.033399082568807345
15 0.045055045871559646
16 0.036788990825688074
17 0.02987614678899083
18 0.04057339449541285
19 0.06451834862385321
20 0.03744036697247707
21 0.02943577981651376
22 0.03588073394495413
23 0.04618807339449542
24 0.034151376146788995
25 0.036486238532110096
In [16]:
[dot(roll(freq_perc, shift), eng_freq) for shift in range(26)]
Out[16]:
[0.03869266055045872,
 0.03429357798165138,
 0.03504128440366973,
 0.0399908256880734,
 0.04433944954128441,
 0.03754587155963303,
 0.040472477064220184,
 0.04020642201834863,
 0.0453256880733945,
 0.03659633027522936,
 0.03198165137614679,
 0.03464220183486239,
 0.038871559633027526,
 0.03320642201834863,
 0.033399082568807345,
 0.045055045871559646,
 0.036788990825688074,
 0.02987614678899083,
 0.04057339449541285,
 0.06451834862385321,
 0.03744036697247707,
 0.02943577981651376,
 0.03588073394495413,
 0.04618807339449542,
 0.034151376146788995,
 0.036486238532110096]
In [17]:
dot_products = [dot(roll(freq_perc, shift), eng_freq) for shift in range(26)]

def find_max(arr):
    return arr.index(max(arr)) #returns first occurence

find_max(dot_products)
Out[17]:
19

Vigenere

opkjcpcsrqtkhespzxxsjmuinieiovvryaaqaeicjnystxnemmbyzrvvvjo pwxzinqjibxzjdmjfsxnizvjiqidfzzymlxyeiljiqidfzzzaspwxcbnihe spzxxsjmuinbnijiehjzutsvrxdvmwmwkihaujharvzvgqsgfqhwtroqvsa bnijycrzzgfwpzxtxuxsrkmvtxmgorxopkxwqvsaxafzmteoquroruxcmic piirjbkwwwjyzqtavmtlopkzipeimihmzmkcvxviovvhnwlxkeiiqmxwwse wvzkmriexdnoirmwoiwcrhlzwdvlsfqrxdwtmgtiinmtxshfrgggwowlqhi xcqsdtgmzirikedtyefirzvqreppvjmwsxvijspziiwrumxizirmexcmkrr rfxzxriowvrjbkxvekiqmtmtxyihmzlchfjvbzeqoyenvuxpivrpbopwdvh jzgrsbgpjqzwqvztoqyrcxtymzkrhppadlkpmemedtgfzifropkmbxvvimz edskiiboezzlpimxepmcmognegfviiqjibxzjdmjydhrxzazswxvqnivtse imioordvvzdwawwwjyzaujqcsimvuxswrvztowhiumijuprrvadvlsfqrxd wtgcrkedvkhwrklzcvhoxvadtredtvemqtmhecmxqfirgfpjzkhhioxrpkv segtgqieppvxcmzeppvpdazwogmiicsfsvzrmmjavmtlxwxvswgsilyxcxy ixwsqcrmygvkvofzpdboigeehzfvsgyiinkbizmjxvkuqdmceoqurcjjxvv jefhzdzlteaijjjzbyzrvvvjopwxzinithcxyimqtjcvdeoqurgitymqzcs bgsncxig
In [34]:
orig="opkjcpcsrqtkhespzxxsjmuinieiovvryaaqaeicjnystxnemmbyzrvvvjo pwxzinqjibxzjdmjfsxnizvjiqidfzzymlxyeiljiqidfzzzaspwxcbnihe spzxxsjmuinbnijiehjzutsvrxdvmwmwkihaujharvzvgqsgfqhwtroqvsa bnijycrzzgfwpzxtxuxsrkmvtxmgorxopkxwqvsaxafzmteoquroruxcmic piirjbkwwwjyzqtavmtlopkzipeimihmzmkcvxviovvhnwlxkeiiqmxwwse wvzkmriexdnoirmwoiwcrhlzwdvlsfqrxdwtmgtiinmtxshfrgggwowlqhi xcqsdtgmzirikedtyefirzvqreppvjmwsxvijspziiwrumxizirmexcmkrr rfxzxriowvrjbkxvekiqmtmtxyihmzlchfjvbzeqoyenvuxpivrpbopwdvh jzgrsbgpjqzwqvztoqyrcxtymzkrhppadlkpmemedtgfzifropkmbxvvimz edskiiboezzlpimxepmcmognegfviiqjibxzjdmjydhrxzazswxvqnivtse imioordvvzdwawwwjyzaujqcsimvuxswrvztowhiumijuprrvadvlsfqrxd wtgcrkedvkhwrklzcvhoxvadtredtvemqtmhecmxqfirgfpjzkhhioxrpkv segtgqieppvxcmzeppvpdazwogmiicsfsvzrmmjavmtlxwxvswgsilyxcxy ixwsqcrmygvkvofzpdboigeehzfvsgyiinkbizmjxvkuqdmceoqurcjjxvv jefhzdzlteaijjjzbyzrvvvjopwxzinithcxyimqtjcvdeoqurgitymqzcs bgsncxig"
In [35]:
len(orig)
Out[35]:
908
In [36]:
"".join(filter(str.isalpha, orig))
Out[36]:
'opkjcpcsrqtkhespzxxsjmuinieiovvryaaqaeicjnystxnemmbyzrvvvjopwxzinqjibxzjdmjfsxnizvjiqidfzzymlxyeiljiqidfzzzaspwxcbnihespzxxsjmuinbnijiehjzutsvrxdvmwmwkihaujharvzvgqsgfqhwtroqvsabnijycrzzgfwpzxtxuxsrkmvtxmgorxopkxwqvsaxafzmteoquroruxcmicpiirjbkwwwjyzqtavmtlopkzipeimihmzmkcvxviovvhnwlxkeiiqmxwwsewvzkmriexdnoirmwoiwcrhlzwdvlsfqrxdwtmgtiinmtxshfrgggwowlqhixcqsdtgmzirikedtyefirzvqreppvjmwsxvijspziiwrumxizirmexcmkrrrfxzxriowvrjbkxvekiqmtmtxyihmzlchfjvbzeqoyenvuxpivrpbopwdvhjzgrsbgpjqzwqvztoqyrcxtymzkrhppadlkpmemedtgfzifropkmbxvvimzedskiiboezzlpimxepmcmognegfviiqjibxzjdmjydhrxzazswxvqnivtseimioordvvzdwawwwjyzaujqcsimvuxswrvztowhiumijuprrvadvlsfqrxdwtgcrkedvkhwrklzcvhoxvadtredtvemqtmhecmxqfirgfpjzkhhioxrpkvsegtgqieppvxcmzeppvpdazwogmiicsfsvzrmmjavmtlxwxvswgsilyxcxyixwsqcrmygvkvofzpdboigeehzfvsgyiinkbizmjxvkuqdmceoqurcjjxvvjefhzdzlteaijjjzbyzrvvvjopwxzinithcxyimqtjcvdeoqurgitymqzcsbgsncxig'
In [37]:
ct = "".join(filter(str.isalpha, ct))
In [38]:
len(ct)
Out[38]:
893
In [39]:
ct
Out[39]:
'opkjcpcsrqtkhespzxxsjmuinieiovvryaaqaeicjnystxnemmbyzrvvvjopwxzinqjibxzjdmjfsxnizvjiqidfzzymlxyeiljiqidfzzzaspwxcbnihespzxxsjmuinbnijiehjzutsvrxdvmwmwkihaujharvzvgqsgfqhwtroqvsabnijycrzzgfwpzxtxuxsrkmvtxmgorxopkxwqvsaxafzmteoquroruxcmicpiirjbkwwwjyzqtavmtlopkzipeimihmzmkcvxviovvhnwlxkeiiqmxwwsewvzkmriexdnoirmwoiwcrhlzwdvlsfqrxdwtmgtiinmtxshfrgggwowlqhixcqsdtgmzirikedtyefirzvqreppvjmwsxvijspziiwrumxizirmexcmkrrrfxzxriowvrjbkxvekiqmtmtxyihmzlchfjvbzeqoyenvuxpivrpbopwdvhjzgrsbgpjqzwqvztoqyrcxtymzkrhppadlkpmemedtgfzifropkmbxvvimzedskiiboezzlpimxepmcmognegfviiqjibxzjdmjydhrxzazswxvqnivtseimioordvvzdwawwwjyzaujqcsimvuxswrvztowhiumijuprrvadvlsfqrxdwtgcrkedvkhwrklzcvhoxvadtredtvemqtmhecmxqfirgfpjzkhhioxrpkvsegtgqieppvxcmzeppvpdazwogmiicsfsvzrmmjavmtlxwxvswgsilyxcxyixwsqcrmygvkvofzpdboigeehzfvsgyiinkbizmjxvkuqdmceoqurcjjxvvjefhzdzlteaijjjzbyzrvvvjopwxzinithcxyimqtjcvdeoqurgitymqzcsbgsncxig'
In [21]:
" "+ct
Out[21]:
' opkjcpcsrqtkhespzxxsjmuinieiovvryaaqaeicjnystxnemmbyzrvvvjopwxzinqjibxzjdmjfsxnizvjiqidfzzymlxyeiljiqidfzzzaspwxcbnihespzxxsjmuinbnijiehjzutsvrxdvmwmwkihaujharvzvgqsgfqhwtroqvsabnijycrzzgfwpzxtxuxsrkmvtxmgorxopkxwqvsaxafzmteoquroruxcmicpiirjbkwwwjyzqtavmtlopkzipeimihmzmkcvxviovvhnwlxkeiiqmxwwsewvzkmriexdnoirmwoiwcrhlzwdvlsfqrxdwtmgtiinmtxshfrgggwowlqhixcqsdtgmzirikedtyefirzvqreppvjmwsxvijspziiwrumxizirmexcmkrrrfxzxriowvrjbkxvekiqmtmtxyihmzlchfjvbzeqoyenvuxpivrpbopwdvhjzgrsbgpjqzwqvztoqyrcxtymzkrhppadlkpmemedtgfzifropkmbxvvimzedskiiboezzlpimxepmcmognegfviiqjibxzjdmjydhrxzazswxvqnivtseimioordvvzdwawwwjyzaujqcsimvuxswrvztowhiumijuprrvadvlsfqrxdwtgcrkedvkhwrklzcvhoxvadtredtvemqtmhecmxqfirgfpjzkhhioxrpkvsegtgqieppvxcmzeppvpdazwogmiicsfsvzrmmjavmtlxwxvswgsilyxcxyixwsqcrmygvkvofzpdboigeehzfvsgyiinkbizmjxvkuqdmceoqurcjjxvvjefhzdzlteaijjjzbyzrvvvjopwxzinithcxyimqtjcvdeoqurgitymqzcsbgsncxig'
In [23]:
shifted = " "+ct
s = 0
for i in range(len(ct)):
    if ct[i] == shifted[i]:
        s += 1
print(s)
47
In [41]:
#Pythonic
shifted = " "+ct
sum(1 for i in range(len(ct)) if ct[i] == shifted[i])
Out[41]:
47
In [42]:
sum(1 for i in range(len(ct)) if ct[i] == (" "*2+ct)[i])
Out[42]:
46
In [43]:
sum(1 for i in range(len(ct)) if ct[i] == (" "*3+ct)[i])
Out[43]:
28
In [44]:
[ (shift, sum(1 for i in range(len(ct)) if ct[i] == (" "*shift+ct)[i])) for shift in range(50)]
Out[44]:
[(0, 893),
 (1, 47),
 (2, 46),
 (3, 28),
 (4, 49),
 (5, 34),
 (6, 33),
 (7, 21),
 (8, 57),
 (9, 27),
 (10, 41),
 (11, 32),
 (12, 47),
 (13, 24),
 (14, 41),
 (15, 33),
 (16, 66),
 (17, 29),
 (18, 38),
 (19, 34),
 (20, 47),
 (21, 37),
 (22, 52),
 (23, 38),
 (24, 52),
 (25, 42),
 (26, 43),
 (27, 29),
 (28, 52),
 (29, 32),
 (30, 52),
 (31, 35),
 (32, 58),
 (33, 29),
 (34, 40),
 (35, 30),
 (36, 56),
 (37, 34),
 (38, 41),
 (39, 37),
 (40, 58),
 (41, 35),
 (42, 43),
 (43, 22),
 (44, 42),
 (45, 17),
 (46, 42),
 (47, 27),
 (48, 61),
 (49, 28)]
In [45]:
sorted([ (shift, sum(1 for i in range(len(ct)) if ct[i] == (" "*shift+ct)[i])) for shift in range(50)], key=lambda x:x[1])
Out[45]:
[(45, 17),
 (7, 21),
 (43, 22),
 (13, 24),
 (9, 27),
 (47, 27),
 (3, 28),
 (49, 28),
 (17, 29),
 (27, 29),
 (33, 29),
 (35, 30),
 (11, 32),
 (29, 32),
 (6, 33),
 (15, 33),
 (5, 34),
 (19, 34),
 (37, 34),
 (31, 35),
 (41, 35),
 (21, 37),
 (39, 37),
 (18, 38),
 (23, 38),
 (34, 40),
 (10, 41),
 (14, 41),
 (38, 41),
 (25, 42),
 (44, 42),
 (46, 42),
 (26, 43),
 (42, 43),
 (2, 46),
 (1, 47),
 (12, 47),
 (20, 47),
 (4, 49),
 (22, 52),
 (24, 52),
 (28, 52),
 (30, 52),
 (36, 56),
 (8, 57),
 (32, 58),
 (40, 58),
 (48, 61),
 (16, 66),
 (0, 893)]
In [64]:
sorted([ (shift, sum(1 for i in range(len(ct)) if ct[i] == (" "*shift+ct)[i])) for shift in range(50)], key=lambda x:x[1], reverse=True)
Out[64]:
[(0, 893),
 (16, 66),
 (48, 61),
 (32, 58),
 (40, 58),
 (8, 57),
 (36, 56),
 (22, 52),
 (24, 52),
 (28, 52),
 (30, 52),
 (4, 49),
 (1, 47),
 (12, 47),
 (20, 47),
 (2, 46),
 (26, 43),
 (42, 43),
 (25, 42),
 (44, 42),
 (46, 42),
 (10, 41),
 (14, 41),
 (38, 41),
 (34, 40),
 (18, 38),
 (23, 38),
 (21, 37),
 (39, 37),
 (31, 35),
 (41, 35),
 (5, 34),
 (19, 34),
 (37, 34),
 (6, 33),
 (15, 33),
 (11, 32),
 (29, 32),
 (35, 30),
 (17, 29),
 (27, 29),
 (33, 29),
 (3, 28),
 (49, 28),
 (9, 27),
 (47, 27),
 (13, 24),
 (43, 22),
 (7, 21),
 (45, 17)]
In [46]:
sorted([ (shift, sum(1 for i in range(len(ct)) if ct[i] == (" "*shift+ct)[i])) for shift in range(1, 50)], key=lambda x:x[1], reverse=True)[:10]
Out[46]:
[(16, 66),
 (48, 61),
 (32, 58),
 (40, 58),
 (8, 57),
 (36, 56),
 (22, 52),
 (24, 52),
 (28, 52),
 (30, 52)]
In [47]:
ct
Out[47]:
'opkjcpcsrqtkhespzxxsjmuinieiovvryaaqaeicjnystxnemmbyzrvvvjopwxzinqjibxzjdmjfsxnizvjiqidfzzymlxyeiljiqidfzzzaspwxcbnihespzxxsjmuinbnijiehjzutsvrxdvmwmwkihaujharvzvgqsgfqhwtroqvsabnijycrzzgfwpzxtxuxsrkmvtxmgorxopkxwqvsaxafzmteoquroruxcmicpiirjbkwwwjyzqtavmtlopkzipeimihmzmkcvxviovvhnwlxkeiiqmxwwsewvzkmriexdnoirmwoiwcrhlzwdvlsfqrxdwtmgtiinmtxshfrgggwowlqhixcqsdtgmzirikedtyefirzvqreppvjmwsxvijspziiwrumxizirmexcmkrrrfxzxriowvrjbkxvekiqmtmtxyihmzlchfjvbzeqoyenvuxpivrpbopwdvhjzgrsbgpjqzwqvztoqyrcxtymzkrhppadlkpmemedtgfzifropkmbxvvimzedskiiboezzlpimxepmcmognegfviiqjibxzjdmjydhrxzazswxvqnivtseimioordvvzdwawwwjyzaujqcsimvuxswrvztowhiumijuprrvadvlsfqrxdwtgcrkedvkhwrklzcvhoxvadtredtvemqtmhecmxqfirgfpjzkhhioxrpkvsegtgqieppvxcmzeppvpdazwogmiicsfsvzrmmjavmtlxwxvswgsilyxcxyixwsqcrmygvkvofzpdboigeehzfvsgyiinkbizmjxvkuqdmceoqurcjjxvvjefhzdzlteaijjjzbyzrvvvjopwxzinithcxyimqtjcvdeoqurgitymqzcsbgsncxig'
In [48]:
ct[::8] #decipher first letter as shift cipher
Out[48]:
'orznyjmvndzzizcznjdhzhaztvoaocjzomvnqvdiddnghgdvmpxczjqhvnpjjomddoiiioidznidzmzidddzdmxjrgcdimxixgdznvovzjvnmomn'
In [49]:
ct[1::8]
Out[49]:
'pqxianmjqmvzlzbxbzvavwbzxtpxqmbqpixwmznwvwmgimtqwzimxbmmbvbzqqzltpmbmgqmaiowavtjvwvctqqzpqmacmwlwvbfkkqvlzjiqqqc'