2018年7月30日月曜日

開発環境

stream(load関数、dump関数)だけではなく、メソッドの引数の文字列を渡しての読み込み、書き出しを出来るようにしてみた。(loads関数、dumps関数)

test.py

#!/usr/bin/env python3
# Copyright © 2018 kamimura. All rights reserved.
import unittest
from sion import loads


class LoadsDictTest(unittest.TestCase):
    def setUp(self):
        pass

    def tearDown(self):
        pass

    def test_list_keys(self):
        a = loads('[[]:nil, [nil]: nil, [nil, true]: false]')
        b = {(): None, (None,): None, (None, True): False}
        self.assertEqual(a, b)

    def test_dict_keys(self):
        a = loads('[[:]:nil, [true:nil]:nil, [true:nil, false:nil]:nil]')
        b = {(): None, ((True, None),): None,
             ((True, None), (False, None)): None}
        self.assertEqual(a, b)


if __name__ == '__main__':
    unittest.main()

SIONVisiter.py

# Created by kamimura on 2018/07/21.
# Copyright © 2018 kamimura. All rights reserved.
# Generated from SION.g4 by ANTLR 4.7.1
from antlr4 import *
if __name__ is not None and "." in __name__:
    from .SIONParser import SIONParser
else:
    from SIONParser import SIONParser

import datetime


def num_rem_under(n):
    return n.replace('_', '')


def str_esc(s):
    for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r')]:
        s = s.replace(o, n)
    return s

# This class defines a complete generic visitor for a parse tree produced by SIONParser.


class SIONVisitor(ParseTreeVisitor):

    # Visit a parse tree produced by SIONParser#si_self.
    def visitSi_self(self, ctx: SIONParser.Si_selfContext):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by SIONParser#si_array.
    def visitSi_array(self, ctx: SIONParser.Si_arrayContext):
        if ctx.si_array_items():
            a = self.visit(ctx.si_array_items())
        else:
            a = []
        return a

    # Visit a parse tree produced by SIONParser#si_array_items.
    def visitSi_array_items(self, ctx: SIONParser.Si_array_itemsContext):
        result = [self.visit(t) for t in ctx.si_self()]
        return result

    # Visit a parse tree produced by SIONParser#si_dict.
    def visitSi_dict(self, ctx: SIONParser.Si_dictContext):
        if ctx.si_dict_pairs():
            d = self.visit(ctx.si_dict_pairs())
        else:
            d = {}
        return d

    # Visit a parse tree produced by SIONParser#si_dict_pairs.
    def visitSi_dict_pairs(self, ctx: SIONParser.Si_dict_pairsContext):
        kvs = [self.visit(t) for t in ctx.si_dict_pair()]
        return {k: v for k, v in kvs}

    # Visit a parse tree produced by SIONParser#si_dict_pair.
    def visitSi_dict_pair(self, ctx: SIONParser.Si_dict_pairContext):
        k, v = [self.visit(t) for t in ctx.si_self()]
        if isinstance(k, list):
            k = tuple(k)
        elif isinstance(k, dict):
            k = tuple((s, t) for s, t in k.items())
        return (k, v)

    # Visit a parse tree produced by SIONParser#si_literal.
    def visitSi_literal(self, ctx: SIONParser.Si_literalContext):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by SIONParser#si_ints.
    def visitSi_ints(self, ctx: SIONParser.Si_intsContext):
        if ctx.SI_minus():
            sign = -1
        else:
            sign = 1
        return sign * self.visitChildren(ctx)

    # Visit a parse tree produced by SIONParser#si_doubles.
    def visitSi_doubles(self, ctx: SIONParser.Si_doublesContext):
        if ctx.SI_minus():
            sign = -1
        else:
            sign = 1
        text = num_rem_under(ctx.SI_double().getText())
        if text[:2] == '0x':
            n = float.fromhex(text)
        else:
            n = float(text)
        return sign * n

    # Visit a parse tree produced by SIONParser#si_bool.
    def visitSi_bool(self, ctx: SIONParser.Si_boolContext):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by SIONParser#si_true.
    def visitSi_true(self, ctx: SIONParser.Si_trueContext):
        return True

    # Visit a parse tree produced by SIONParser#si_false.
    def visitSi_false(self, ctx: SIONParser.Si_falseContext):
        return False

    # Visit a parse tree produced by SIONParser#si_nil.
    def visitSi_nil(self, ctx: SIONParser.Si_nilContext):
        return None

    # Visit a parse tree produced by SIONParser#si_int.
    def visitSi_int(self, ctx: SIONParser.Si_intContext):
        if ctx.SI_bin():
            n = int(num_rem_under(ctx.SI_bin().getText()), 2)
        elif ctx.SI_oct():
            n = int(num_rem_under(ctx.SI_oct().getText()), 8)
        elif ctx.SI_decimal():
            n = int(num_rem_under(ctx.SI_decimal().getText()), 10)
        elif ctx.SI_hex():
            n = int(num_rem_under(ctx.SI_hex().getText()), 16)
        return n

    # Visit a parse tree produced by SIONParser#si_data.
    def visitSi_data(self, ctx: SIONParser.Si_dataContext):
        return ctx.SI_data().getText()[7:-2].encode('ascii')

    # Visit a parse tree produced by SIONParser#si_date.
    def visitSi_date(self, ctx: SIONParser.Si_dateContext):
        if ctx.si_doubles():
            t = self.visit(ctx.si_doubles())
        else:
            t = self.visit(ctx.si_ints())
        d = datetime.datetime.fromtimestamp(t)
        return d

    # Visit a parse tree produced by SIONParser#si_string.
    def visitSi_string(self, ctx: SIONParser.Si_stringContext):
        return ctx.SI_string_literal().getText()[1:-1]


del SIONParser

sion.py

# Created by kamimura on 2018/07/21.
# Copyright © 2018 kamimura. All rights reserved.
import sys
import datetime
from antlr4 import *
from SIONLexer import SIONLexer
from SIONParser import SIONParser
from SIONVisitor import SIONVisitor


def load(file, encoding: str='utf-8', errors: str='strict') -> object:
    data = file.read()
    if type(data) == bytes:
        data = data.decode(encoding, errors)
    stream = InputStream(data)
    lexer = SIONLexer(stream)
    tokens = CommonTokenStream(lexer)
    parser = SIONParser(tokens)
    tree = parser.si_self()
    visitor = SIONVisitor()
    return visitor.visit(tree)


def loads(s):
    if type(s) == bytes:
        s = s.decode()
    stream = InputStream(s)
    lexer = SIONLexer(stream)
    tokens = CommonTokenStream(lexer)
    parser = SIONParser(tokens)
    tree = parser.si_self()
    visitor = SIONVisitor()
    return visitor.visit(tree)


def str_esc(s):
    for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r'), ('\\', '\\\\')]:
        s = s.replace(o, n)
    return s


def dump(obj, file):
    t = type(obj)
    if obj is None:
        print('nil', file=file, end='')
    elif t == bool:
        if obj:
            print('ture', file=file, end='')
        else:
            print('false', file=file, end='')
    elif t in {int, float}:
        print(obj, file=file, end='')
    elif t == str:
        print(f'"{str_esc(obj)}"', file=file, end='')
    elif t == bytes:
        print(f'.Data("{str(obj)[2:-1]}")', file=file, end='')
    elif t == datetime.datetime:
        print(f'.Date({t.timestamp(obj)})', file=file, end='')
    elif t in {list, tuple}:
        print(f'[', file=file, end='')
        if len(obj) > 0:
            for o in obj[:-1]:
                dump(o, file)
                print(',', file=file, end='')
            dump(obj[-1], file)
        print(']', file=file, end='')
    elif t == dict:
        print('[', file=file, end='')
        ks = list(obj.keys())
        if len(ks) == 0:
            print(':', file=file, end='')
        elif len(ks) == 1:
            dump(ks[0], file)
            print(':', file=file, end='')
            dump(obj[ks[0]], file)
        else:
            for k in ks[:-1]:
                dump(k, file)
                print(':', file=file, end='')
                dump(obj[k], file)
                print(',', file=file, end='')
            dump(ks[-1], file)
            print(':', file=file, end='')
            dump(obj[ks[-1]], file)
        print(']', file=file, end='')
    else:
        raise TypeError(
            f"Object of type '{obj.__class__.__name__}' is not SION serializable")


def dumps(obj: object):
    t = type(obj)
    if obj is None:
        return 'nil'
    if t == bool:
        if obj:
            return 'true'
        return 'false'
    if t in {int, float}:
        return str(obj)
    if t == str:
        return f'"{str_esc(obj)}"'
    if t == bytes:
        return f'.Data("{str(obj)[2:-1]}")'
    if t == datetime.datetime:
        return f'.Date({t.timestamp(obj)})'
    if t in {list, tuple}:
        res = '['
        if len(obj) > 0:
            for o in obj[:-1]:
                res += dumps(o) + ','
            res += dumps(obj[-1])
        res += ']'
        return res
    if t == dict:
        res = '['
        ks = list(obj.keys())
        if len(ks) == 0:
            res += ':'
        elif len(ks) == 1:
            res += dumps(ks[0]) + ':' + dumps(obj[ks[0]])
        else:
            for k in ks[:-1]:
                res += dumps(k) + ':' + str(obj[k]) + ','
            res += dumps(ks[-1]) + ':' + dumps(obj[ks[-1]])
        res += ']'
        return res
    raise TypeError(
        f"Object of type '{obj.__class__.__name__}' is not SION serializable")


if __name__ == '__main__':
    if len(sys.argv) > 1:
        filename = sys.argv[1]
    else:
        filename = '../test/t.sion'
    with open(filename) as f:
        obj = load(f)
    print(obj)
    with open('../test/output.sion', 'w') as f:
        dump(obj, f)

入出力結果(Terminal, Jupyter(IPython))

$ ./test.py
..
----------------------------------------------------------------------
Ran 2 tests in 0.013s

OK
$

SIONのディクショナリのキーがunhashable typeの場合、とりあえず文字列に変換してたのを、よりデータ型が近い(?)タプル(リストの場合)、タプルのタプル(dictionaryの場合)に変更。

タプルで済ませるのではなく、SION classを作成、抽象化して、情報を失わないようにするか迷ったり。今のところ、情報が失われる事を許容してにタプルで済ませてた方が、Pythonのオブジェクトとして気軽の扱えるからいいかなぁと考えてたり。

0 コメント:

コメントを投稿