2016年8月25日木曜日

開発環境

Think Python (Allen B. Downey (著)、 O'Reilly Media)のChapter 13.(Case Study: Data Structure Selection)のExercises 13-6.(No. 3021)を取り組んでみる。

Exercises 13-6.(No. 3021)

コード(Emacs)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import string


def get_words(filename, header=True):
    words = {}
    with open(filename) as f:
        flag = header
        for line in f:
            if flag:
                if line.startswith('***'):
                    flag = False
                continue
            for ch in string.punctuation:
                line = line.replace(ch, ' ')
            for word in line.split():
                word = word.strip(string.whitespace).lower()
                words[word] = words.get(word, 0) + 1
    return words

if __name__ == '__main__':
    book_filename = 'History_of_a_Six_Weeks_Tour_by_Shelley_and_Shelley.txt'
    words_filename = 'words.txt'
    book_word_set = set(get_words(book_filename).keys())
    word_set = set(get_words(words_filename, header=False).keys())
    words = book_word_set - word_set
    words_list = sorted(words)
    print('{0}\n{1}\n{2}'.format(len(book_word_set), len(words),
                                 words_list[:10]))

入出力結果(Terminal, IPython)

$ ./sample6.py
4266
502
['0', '000', '1', '12', '12th', '13th', '15', '1500', '1560', '1670']
$

0 コメント:

コメントを投稿