開発環境
- macOS Sierra - Apple (OS)
- Emacs (Text Editor)
- Python 3.6 (プログラミング言語)
行列プログラマー(Philip N. Klein (著)、 松田 晃一 (翻訳)、 弓林 司 (翻訳)、 脇本 佑紀 (翻訳)、 中田 洋 (翻訳)、 齋藤 大吾 (翻訳)、オライリージャパン)の0章(関数(とその他の数学とコンピュータに関する予備知識))、0.5(ラボ: Python 入門 - 集合、リスト、辞書、内包表記)、0.6.2(ミニ検索エンジン)、課題 0.6.6、0.6.7、0.6.8を取り組んでみる。
課題 0.6.6、0.6.7、0.6.8
コード(Emacs)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- filename1 = 'stories_small.txt' filename2 = 'stories_big.txt' # 6.6 def make_inverse_index(str_list): d = {} for i, line in enumerate(str_list): for word in line.split(): if word in d: d[word].add(i) else: d[word] = {i} return d # 6.7 def or_search(inverse_index, query): s = set() for q in query: s.update(inverse_index.get(q, set())) return s # 6.8 def and_search(inverse_index, query): s = inverse_index.get(query[0], {}) for q in query[1:]: s.intersection_update(inverse_index.get(q, {})) return s for filename in [filename1, filename2]: print(filename) for query in [['world', 'final'], ['world', 'cup']]: print(query) for search in [or_search, and_search]: print(search.__name__) with open(filename) as f: inverse_index = make_inverse_index(f) print(sorted(search(inverse_index, query)))
入出力結果(Terminal, IPython)
$ ./sample6_6.py stories_small.txt ['world', 'final'] or_search [0, 1, 10, 11, 14, 15, 16, 17, 18, 20, 21, 26, 41, 42, 44, 47, 48] and_search [11] ['world', 'cup'] or_search [1, 10, 11, 14, 15, 16, 20, 21, 26, 41, 42, 44, 47, 48] and_search [] stories_big.txt ['world', 'final'] or_search [0, 1, 10, 11, 14, 15, 16, 17, 18, 20, 21, 26, 41, 42, 44, 47, 48, 60, 65, 66, 67, 75, 78, 87, 89, 96, 101, 107, 109, 110, 119, 120, 127, 128, 131, 134, 137, 140, 141, 142, 143, 144, 148, 149, 152, 153, 154, 156, 160, 162, 164, 169, 172, 175, 176, 177, 178, 179, 183, 187, 191, 198, 199, 207, 215, 221, 228, 230, 232, 233, 235, 238, 239, 241, 247, 248, 255, 257, 258, 259, 265, 269, 270, 271, 272, 280, 284, 297, 298, 305, 308, 310, 314, 317, 319, 322, 323, 329, 335, 345, 347, 351, 352, 353, 354, 355, 358, 361, 365, 367, 368, 374, 379, 380, 384, 386, 388, 389, 390, 391, 405, 406, 410, 412, 415, 416, 417, 421, 423, 424, 426, 428, 429, 434, 436, 437, 438, 440, 446, 447, 449, 450, 453, 454, 455, 459, 463, 464, 465, 466, 467, 470, 472, 473, 477, 481, 485, 490, 492, 494, 495, 496, 503, 508, 516, 522, 526, 527, 530, 531, 537, 550, 553, 554, 555, 557, 559, 569, 575, 579, 586, 588, 593, 600, 601, 602, 605, 607, 608, 613, 616, 618, 623, 625, 636, 638, 645, 649, 653, 654, 661, 662, 664, 671, 673, 676, 677, 679, 681, 683, 687, 694, 700, 701, 704, 708, 709, 710, 711, 714, 715, 717, 718, 719, 720, 727, 731, 734, 739, 740, 743, 747, 751, 755, 756, 759, 760, 763, 768, 770, 772, 780, 782, 783, 788, 790, 791, 793, 794, 796, 798, 799, 802, 805, 811, 812, 815, 819, 823, 824, 825, 832, 836, 838, 840, 842, 844, 846, 848, 852, 853, 862, 864, 865, 866, 868, 870, 877, 881, 883, 884, 886, 890, 894, 897, 899, 910, 912, 913, 923, 925, 927, 928, 930, 932, 935, 937, 938, 939, 941, 945, 952, 953, 954, 955, 956, 959, 960, 963, 966, 967, 968, 971, 974, 980, 981, 985, 987, 989, 993, 996, 1017, 1019, 1024, 1025, 1027, 1030, 1031, 1033, 1035, 1036, 1038, 1039, 1041, 1045, 1046, 1047, 1050, 1051, 1053, 1062, 1064, 1065, 1074, 1075, 1077, 1081, 1085, 1086, 1091, 1096, 1097, 1098] and_search [11, 65, 110, 149, 198, 238, 298, 308, 314, 361, 368, 374, 406, 412, 467, 530, 554, 557, 559, 601, 616, 683, 710, 714, 780, 825, 941, 952, 953, 967, 981, 1027, 1033] ['world', 'cup'] or_search [1, 10, 11, 14, 15, 16, 20, 21, 26, 41, 42, 44, 47, 48, 65, 66, 67, 78, 87, 89, 96, 101, 107, 109, 110, 119, 120, 127, 128, 131, 134, 137, 140, 141, 142, 143, 144, 148, 149, 152, 153, 156, 160, 162, 164, 169, 172, 177, 178, 179, 183, 187, 191, 198, 199, 207, 215, 228, 230, 231, 232, 233, 235, 238, 239, 241, 247, 248, 255, 257, 258, 259, 265, 269, 270, 272, 280, 297, 298, 305, 308, 310, 314, 317, 319, 322, 323, 335, 345, 352, 353, 355, 358, 361, 365, 367, 368, 374, 379, 380, 384, 386, 388, 389, 391, 406, 410, 412, 415, 416, 417, 421, 423, 424, 426, 428, 434, 436, 437, 438, 440, 446, 447, 449, 453, 454, 455, 459, 463, 466, 467, 470, 472, 473, 477, 481, 485, 490, 492, 494, 495, 496, 503, 516, 522, 526, 527, 530, 531, 537, 554, 555, 557, 559, 570, 572, 575, 576, 577, 579, 585, 586, 588, 590, 593, 600, 601, 602, 605, 607, 608, 613, 616, 618, 623, 625, 636, 638, 640, 645, 649, 653, 654, 662, 664, 671, 676, 683, 694, 700, 701, 704, 708, 709, 710, 711, 714, 715, 717, 718, 719, 720, 731, 739, 740, 743, 747, 755, 756, 760, 763, 768, 770, 772, 780, 783, 790, 791, 794, 796, 798, 811, 812, 815, 819, 823, 824, 825, 832, 836, 838, 840, 842, 844, 846, 862, 864, 865, 866, 868, 870, 881, 883, 884, 886, 890, 894, 897, 910, 912, 913, 923, 925, 927, 928, 930, 932, 935, 937, 938, 939, 941, 945, 952, 953, 954, 955, 956, 960, 963, 966, 967, 968, 971, 974, 980, 981, 985, 987, 989, 993, 996, 1017, 1019, 1024, 1025, 1027, 1031, 1033, 1035, 1036, 1038, 1039, 1041, 1045, 1046, 1047, 1050, 1051, 1053, 1062, 1064, 1065, 1074, 1075, 1077, 1081, 1083, 1085, 1086, 1089, 1091, 1096, 1097, 1098] and_search [819, 1025] $
0 コメント:
コメントを投稿