開発環境
- OS X El Capitan - Apple (OS)
- Emacs (Text Editor)
- Python 3.5 (プログラミング言語)
Think Python (Allen B. Downey (著)、 O'Reilly Media)のChapter 13.(Case Study: Data Structure Selection)のExercises 13-9.(No. 3150)を取り組んでみる。
Exercises 13-9.(No. 3150)
コード(Emacs)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import string
import matplotlib.pyplot as plt
import math
def readFile(filename, prefix_length=2, header_end='*END*'):
words = {}
with open(filename) as f:
for line in f:
if line.startswith(header_end):
break
chars = string.whitespace + string.punctuation
for line in f:
for ch in chars:
line = line.replace(ch, ' ')
for word in line.split():
word = word.strip()
words[word] = words.get(word, 0) + 1
return words
if __name__ == '__main__':
filename = 'emma.txt'
words = readFile(filename)
words = [(f, w) for w, f in words.items()]
words = sorted(words, key=lambda t: -t[0])
print('{0:^15} {1:^20} {2:^20}'.format('word', 'log f', 'log r'))
fs = []
rs = []
j = int(len(words) / 5)
k = len(words) - 10
for i, (f, w) in enumerate(words):
rank = i + 1
if i < 10 or j < i <= (j + 10) or k <= i:
print('{0:<15} {1:>20.16f} {2:>20.16f}'.format(
w, math.log(f), math.log(rank)))
rs.append(math.log(rank))
fs.append(math.log(f))
plt.figure(figsize=(5, 5))
plt.plot(rs, fs)
plt.savefig('zipf_law.svg')
plt.show()
入出力結果(Terminal, IPython)
$ ./sample9.py word log f log r to 8.5537179660986098 0.0000000000000000 the 8.4859089013764706 0.6931471805599453 and 8.4495565427004262 1.0986122886681098 of 8.3624089776153703 1.3862943611198906 I 8.0684029585696990 1.6094379124341003 a 8.0080328469693072 1.7917594692280550 her 7.7832240163360371 1.9459101490553132 was 7.7777926263388304 2.0794415416798357 it 7.6634076648934792 2.1972245773362196 in 7.6582275261613519 2.3025850929940459 dull 2.1972245773362196 7.3225104339973939 talent 2.1972245773362196 7.3231707179434693 supply 2.1972245773362196 7.3238305662023171 End 2.1972245773362196 7.3244899793485319 Don 2.1972245773362196 7.3251489579555749 receive 2.1972245773362196 7.3258075025957732 hesitation 2.1972245773362196 7.3264656138403224 forty 2.1972245773362196 7.3271232922592926 collected 2.1972245773362196 7.3277805384216315 resumed 2.1972245773362196 7.3284373528951621 physician 0.0000000000000000 8.9299648747068403 losses 0.0000000000000000 8.9300972286214009 palpable 0.0000000000000000 8.9302295650207206 movements 0.0000000000000000 8.9303618839094341 enjoyable 0.0000000000000000 8.9304941852921775 cheese 0.0000000000000000 8.9306264691735784 facing 0.0000000000000000 8.9307587355582694 liveliest 0.0000000000000000 8.9308909844508779 Astonished 0.0000000000000000 8.9310232158560297 dispensed 0.0000000000000000 8.9311554297783484 $
0 コメント:
コメントを投稿