2017年1月14日土曜日

開発環境

行列プログラマー(Philip N. Klein (著)、 松田 晃一 (翻訳)、 弓林 司 (翻訳)、 脇本 佑紀 (翻訳)、 中田 洋 (翻訳)、 齋藤 大吾 (翻訳)、オライリージャパン)の0章(関数(とその他の数学とコンピュータに関する予備知識))、0.5(ラボ: Python 入門 - 集合、リスト、辞書、内包表記)、0.6.2(ミニ検索エンジン)、課題 0.6.6、0.6.7、0.6.8を取り組んでみる。

課題 0.6.6、0.6.7、0.6.8

コード(Emacs)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


filename1 = 'stories_small.txt'
filename2 = 'stories_big.txt'

# 6.6


def make_inverse_index(str_list):
    d = {}
    for i, line in enumerate(str_list):
        for word in line.split():
            if word in d:
                d[word].add(i)
            else:
                d[word] = {i}
    return d

# 6.7


def or_search(inverse_index, query):
    s = set()
    for q in query:
        s.update(inverse_index.get(q, set()))
    return s


# 6.8
def and_search(inverse_index, query):
    s = inverse_index.get(query[0], {})
    for q in query[1:]:
        s.intersection_update(inverse_index.get(q, {}))
    return s

for filename in [filename1, filename2]:
    print(filename)
    for query in [['world', 'final'], ['world', 'cup']]:
        print(query)
        for search in [or_search, and_search]:
            print(search.__name__)
            with open(filename) as f:
                inverse_index = make_inverse_index(f)
            print(sorted(search(inverse_index, query)))

入出力結果(Terminal, IPython)

$ ./sample6_6.py
stories_small.txt
['world', 'final']
or_search
[0, 1, 10, 11, 14, 15, 16, 17, 18, 20, 21, 26, 41, 42, 44, 47, 48]
and_search
[11]
['world', 'cup']
or_search
[1, 10, 11, 14, 15, 16, 20, 21, 26, 41, 42, 44, 47, 48]
and_search
[]
stories_big.txt
['world', 'final']
or_search
[0, 1, 10, 11, 14, 15, 16, 17, 18, 20, 21, 26, 41, 42, 44, 47, 48, 60, 65, 66, 67, 75, 78, 87, 89, 96, 101, 107, 109, 110, 119, 120, 127, 128, 131, 134, 137, 140, 141, 142, 143, 144, 148, 149, 152, 153, 154, 156, 160, 162, 164, 169, 172, 175, 176, 177, 178, 179, 183, 187, 191, 198, 199, 207, 215, 221, 228, 230, 232, 233, 235, 238, 239, 241, 247, 248, 255, 257, 258, 259, 265, 269, 270, 271, 272, 280, 284, 297, 298, 305, 308, 310, 314, 317, 319, 322, 323, 329, 335, 345, 347, 351, 352, 353, 354, 355, 358, 361, 365, 367, 368, 374, 379, 380, 384, 386, 388, 389, 390, 391, 405, 406, 410, 412, 415, 416, 417, 421, 423, 424, 426, 428, 429, 434, 436, 437, 438, 440, 446, 447, 449, 450, 453, 454, 455, 459, 463, 464, 465, 466, 467, 470, 472, 473, 477, 481, 485, 490, 492, 494, 495, 496, 503, 508, 516, 522, 526, 527, 530, 531, 537, 550, 553, 554, 555, 557, 559, 569, 575, 579, 586, 588, 593, 600, 601, 602, 605, 607, 608, 613, 616, 618, 623, 625, 636, 638, 645, 649, 653, 654, 661, 662, 664, 671, 673, 676, 677, 679, 681, 683, 687, 694, 700, 701, 704, 708, 709, 710, 711, 714, 715, 717, 718, 719, 720, 727, 731, 734, 739, 740, 743, 747, 751, 755, 756, 759, 760, 763, 768, 770, 772, 780, 782, 783, 788, 790, 791, 793, 794, 796, 798, 799, 802, 805, 811, 812, 815, 819, 823, 824, 825, 832, 836, 838, 840, 842, 844, 846, 848, 852, 853, 862, 864, 865, 866, 868, 870, 877, 881, 883, 884, 886, 890, 894, 897, 899, 910, 912, 913, 923, 925, 927, 928, 930, 932, 935, 937, 938, 939, 941, 945, 952, 953, 954, 955, 956, 959, 960, 963, 966, 967, 968, 971, 974, 980, 981, 985, 987, 989, 993, 996, 1017, 1019, 1024, 1025, 1027, 1030, 1031, 1033, 1035, 1036, 1038, 1039, 1041, 1045, 1046, 1047, 1050, 1051, 1053, 1062, 1064, 1065, 1074, 1075, 1077, 1081, 1085, 1086, 1091, 1096, 1097, 1098]
and_search
[11, 65, 110, 149, 198, 238, 298, 308, 314, 361, 368, 374, 406, 412, 467, 530, 554, 557, 559, 601, 616, 683, 710, 714, 780, 825, 941, 952, 953, 967, 981, 1027, 1033]
['world', 'cup']
or_search
[1, 10, 11, 14, 15, 16, 20, 21, 26, 41, 42, 44, 47, 48, 65, 66, 67, 78, 87, 89, 96, 101, 107, 109, 110, 119, 120, 127, 128, 131, 134, 137, 140, 141, 142, 143, 144, 148, 149, 152, 153, 156, 160, 162, 164, 169, 172, 177, 178, 179, 183, 187, 191, 198, 199, 207, 215, 228, 230, 231, 232, 233, 235, 238, 239, 241, 247, 248, 255, 257, 258, 259, 265, 269, 270, 272, 280, 297, 298, 305, 308, 310, 314, 317, 319, 322, 323, 335, 345, 352, 353, 355, 358, 361, 365, 367, 368, 374, 379, 380, 384, 386, 388, 389, 391, 406, 410, 412, 415, 416, 417, 421, 423, 424, 426, 428, 434, 436, 437, 438, 440, 446, 447, 449, 453, 454, 455, 459, 463, 466, 467, 470, 472, 473, 477, 481, 485, 490, 492, 494, 495, 496, 503, 516, 522, 526, 527, 530, 531, 537, 554, 555, 557, 559, 570, 572, 575, 576, 577, 579, 585, 586, 588, 590, 593, 600, 601, 602, 605, 607, 608, 613, 616, 618, 623, 625, 636, 638, 640, 645, 649, 653, 654, 662, 664, 671, 676, 683, 694, 700, 701, 704, 708, 709, 710, 711, 714, 715, 717, 718, 719, 720, 731, 739, 740, 743, 747, 755, 756, 760, 763, 768, 770, 772, 780, 783, 790, 791, 794, 796, 798, 811, 812, 815, 819, 823, 824, 825, 832, 836, 838, 840, 842, 844, 846, 862, 864, 865, 866, 868, 870, 881, 883, 884, 886, 890, 894, 897, 910, 912, 913, 923, 925, 927, 928, 930, 932, 935, 937, 938, 939, 941, 945, 952, 953, 954, 955, 956, 960, 963, 966, 967, 968, 971, 974, 980, 981, 985, 987, 989, 993, 996, 1017, 1019, 1024, 1025, 1027, 1031, 1033, 1035, 1036, 1038, 1039, 1041, 1045, 1046, 1047, 1050, 1051, 1053, 1062, 1064, 1065, 1074, 1075, 1077, 1081, 1083, 1085, 1086, 1089, 1091, 1096, 1097, 1098]
and_search
[819, 1025]
$

0 コメント:

コメントを投稿