2016年8月15日月曜日

開発環境

Pythonからはじめる数学入門 (Amit Saha (著)、黒川 利明 (翻訳)、オライリージャパン)の3章(データを統計量で記述する)、3.9(プログラミングチャレンジ)、問題3-2(統計電卓)を取り組んでみる。

問題3-2(統計電卓)

コード(Emacs)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import random
import matplotlib.pyplot as plt
from collections import Counter


def calculate_mean(numbers):
    s = sum(numbers)
    n = len(numbers)
    mean = s / n

    return mean


def calculate_median(numbers):
    n = len(numbers)
    numbers = sorted(numbers)

    if n % 2 == 0:
        n1 = n / 2
        n2 = n1 + 1
        i1 = int(n1) - 1
        i2 = int(n2) - 1
        median = (numbers[i1] + numbers[i2]) / 2
    else:
        n1 = (n + 1) / 2
        i = int(n1) - 1
        median = numbers[i]

    return median


def calculate_mode(numbers):
    c = Counter(numbers)
    mode = c.most_common(1)
    numbers_freq = c.most_common()
    max_count = numbers_freq[0][1]

    modes = []
    for num, count in numbers_freq:
        if count == max_count:
            modes.append(num)
        else:
            break

    return modes


def find_differences(numbers):
    mean = calculate_mean(numbers)
    diff = [num - mean for num in numbers]
    return diff


def calculate_variance(numbers):
    diff = find_differences(numbers)
    squared_diff = [d ** 2 for d in diff]
    variance = sum(squared_diff) / len(numbers)

    return variance


def calculate_standard_deviation(numbers):
    variance = calculate_variance(numbers)
    std_dev = variance ** (1 / 2)

    return std_dev


def draw_graph(numbers):
    n = len(numbers)
    x = range(n)
    plt.plot(x, numbers)
    plt.ylabel('値')
    mean = calculate_mean(numbers)
    median = calculate_median(numbers)
    mode = calculate_mode(numbers)
    variance = calculate_variance(numbers)
    std_deviation = calculate_standard_deviation(numbers)
    for y in [mean, median]:
        plt.plot(x, [y for _ in x])
    plt.legend(['値', '平均値', '中央値'])
    pairs = [('平均', mean), ('中央値', median),
             ('最頻値', ', '.join(map(str, mode))),
             ('分散', variance), ('標準偏差', std_deviation)]
    for a, b in pairs:
        print('{0}: {1}'.format(a, b))
    plt.show()

if __name__ == '__main__':
    if len(sys.argv) == 2:
        filename = sys.argv[1]
        with open(filename) as f:
            numbers = [int(line.strip()) for line in f]
    else:
        numbers = [random.randrange(1, 101) for _ in range(100)]

    draw_graph(numbers)

入出力結果(Terminal, IPython)

$ ./calculate.py mydata.txt 
平均: 477.75
中央値: 500.0
最頻値: 100, 500
分散: 141047.35416666666
標準偏差: 375.5627166887931
$ ./calculate.py
平均: 49.2
中央値: 45.5
最頻値: 26, 51, 71
分散: 867.82
標準偏差: 29.45878476787527
$ ./calculate.py
平均: 53.76
中央値: 55.0
最頻値: 38
分散: 749.3624000000003
標準偏差: 27.374484470031582
$

0 コメント:

コメントを投稿