const.py 9.92 KB
REPLACE_DICT_1 = {
    "元": "圆",
    # "零角": "零",
    "柴": "柒",
    "染": "柒",
    "查": "壹",
    "武": "贰",
    "家": "贰",
    "就": "贰",
    "登": "叁",
    # "@整": "叁",
    "鑫": "叁",
    "垂": "叁",
    "捆": "捌",
    "搁": "捌",
    "级": "捌",
    "测": "捌",
    "拥": "捌",
    "损": "捌",
    "盒": "叁",
    "摄": "捌",
    "报": "捌",
    "会": "叁",
    "索": "壹",
    "任": "仟",
    "杆": "仟",
    "仔": "仟",
    "什": "仟",
    "付": "仟",
    "伴": "仟",
    "宿": "佰",
    "信": "佰",
    "情": "佰",
    "值": "佰",
    "荣": "柒",
    "渠": "柒",
    "类": "柒",
    "案": "柒",
    "集": "柒",
    "方": "万",
    "抬": "拾",
    "给": "拾",
    "樟": "肆",
    "单": "肆",
    "邮": "肆",
    "政": "玖",
    "拐": "捌",
    # "柴": "柒",
    # "任": "仟",
    # "拥": "捌",
    # "会": "叁",
}


ARG_KEY_KEY_LIST = 'keys_list'
ARG_KEY_VALUE_DICT = 'values_dict'

INVOICE_KEY_LIST = [
    ('纳税人识别号', False),  # 相近的key 0
    ('增值税', False),   # 相近的key 1

    ('地', False),   # 单字的key 2
    ('址', False),   # 单字的key 3

    ('开票日期', '开票曰期', '开票日', True),  # 4
    ('发票代码', '发票代鸡', True),  # 5
    ('发票号码', '发票号瑞', '发要号瑞', True),  # 6 
    ('机打代码', False),  # 7
    ('机打号码', '机打号玛', False),  # 8 
    ('机器编号', False),   # 9
    ('购买方名称', '购买方名称及', False),  # 10
    ('纳税人识别号/', False),   # 11
    ('统一社会信用代码/', False),  # 12
    ('身份证号码', '身份证号码/', False),  # 13 
    ('车辆类型', True),   # 14
    ('厂牌型号', '广牌型号', '厂胖型号', '广牌型考', True),  # 15
    ('产地', '严地', True),   # 16
    ('合格证号', False),   # 17
    ('进口证明书号', True),   # 18
    ('商检单号', True),   # 19
    ('发动机号码', False),   # 20
    ('车辆识别代号/车架号码', True),  # 21 
    ('价税合计', '价现合计', '价“税合计', False),  # 22
    ('小写', True),   # 23 TODO 多个值时的取值
    ('销货单位名称', False),  # 24 
    ('电话', True),   # 25
    ('账号', '账考', '帐号', '帐考', '张号', '陈号', '昨号', True),  # 26 
    ('开户银行', True),   # 27
    ('增值税税率', True),   # 28 value false
    ('或征收税', False),   # 29
    ('税额', False),   # 30
    ('主管税务', True),   # 31 value False
    ('机关及代码', True),   # 32
    ('不含税价', True),   # 33 value False
    ('完税凭证号码', False),   # 34
    ('开票人', True),   # 35
    ('吨位', True),   # 36
    ('限乘人数', '跟乘人数', True),  # 37 TODO '人数'这种情况的坐标切分 
    ('备注', True)  # 38
]

# split key-value一体
# append key-value_suffix  需要坐标切分
# insert key-value_prefix  需要坐标切分 
INVOICE_VALUE_DICT = {
    '开票日期': {
        'length': 10,
        'str_type': 'date',
        #             idx, location, top, bottom, left, (idx, scope), choice, if_startswith
        'location': [(4, 'right', 0.3, 0.5, 0, (2, ), 'xmin', 'split')],
        'fix_methods': [('prune_first_char', {'char_set': {':', ':', ';', }})]
    },
    '发票代码': {
        'length': 12,
        'str_type': 'int',
        'location': [(5, 'right', 0.2, 0.2, 0, (2, ), 'xmin', 'split')]
    },
    '发票号码': {
        'length': 8,
        'str_type': 'int',
        'location': [(6, 'right', 0.2, 0.5, 0, (2, ), 'length', 'split')],
        'fix_methods': [('prune_first_char', {'char_set': {'-',}})]
    },
    '机打代码': {
        'length': 12,
        'str_type': 'int',
        'location': [(7, 'right', 0.5, 1, 0, (2, ), 'ymin', None)]
    },
    '机器编号': {
        'length': 12,
        'str_type': 'int',
        'location': [(9, 'right', 0.5, 1, 0, (2, ), 'ymax', None)]
    },
    '机打号码': {
        'length': 8,
        'str_type': 'int',
        'location': [(8, 'right', 0.5, 0.5, 0, (2, ), 'length', None)]
    },
    '购买方名称': {
        'length': None,
        'str_type': 'str',  # cn
        'location': [(10, 'right', 0.5, 0.5, 0, (11, 12, 13, 2), 'xmin', None)]
    },
    '纳税人识别号/统一社会信用代码/身份证号码': {
        'length': 18,
        'str_type': 'str',  # alnum
        'location': [(11, 'right', 0, 2, 0, (2.5, ), 'length', None), (12, 'right', 1, 1, 0, (2, ), 'length', None), (13, 'right', 2, 0, 0.5, (3, ), 'length', None)]
    },
    '车辆类型': {
        'length': None,
        'str_type': 'str',
        'location': [(14, 'right', 0.2, 0.2, 0, (15, 1.5), 'xmin', 'split'), (15, 'left', 0.2, 0.2, 0, (14, 2.5), 'xmax', None)]
    },
    '厂牌型号': {
        'length': None,
        'str_type': 'str',
        'location': [(15, 'right', 0.2, 0.2, 0, (16, 3.5), 'xmin', 'split'), (16, 'left', 0.2, 0.2, 0, (15, 2.5), 'xmax', None)]
    },
    '产地': {
        'length': None,
        'str_type': 'str',  # cn
        'location': [(16, 'right', 0.2, 0.2, 0, (2.5, ), 'xmin', 'split')]
    },
    '合格证号': {
        'length': None,  # 15
        'str_type': 'str',  # alnum
        'location': [(17, 'right', 0.2, 0.2, 0, (18, 1.5), 'xmin', None), (18, 'left', 0.2, 0.2, 0, (17, 1.5), 'xmax', None)]
    },
    '进口证明书号': {
        'length': None,
        'str_type': 'str',  # alnum
        'location': [(18, 'right', 0.3, 0.3, 0, (19, 1.5), 'xmin', 'split'), (19, 'left', 0.2, 0.2, 0, (18, 3), 'xmax', None)]
    },
    '商检单号': {
        'length': None,
        'str_type': 'str',  
        'location': [(19, 'right', 0.2, 0.2, 0, (1.5, ), 'xmin', 'split')]
    },
    '发动机号码': {
        'length': None,
        'str_type': 'str',  # alnum
        'location': [(20, 'right', 0.2, 0.2, 0, (21, 2), 'xmin', None), (21, 'left', 0.2, 0.2, 0, (20, 1.4), 'xmax', None)]
    },
    '车辆识别代号/车架号码': {
        'length': 17,
        'str_type': 'str',  # alnum  
        'location': [(21, 'right', 0.3, 0.3, 0, (1.2, ), 'xmin', 'split')]
    },
    '价税合计大写': {
        'length': None,
        'str_type': 'str',  # cn
        'location': [(22, 'right', 0.2, 0.2, 0, (23, 3), 'xmin', None), (23, 'left', 0.2, 0.2, 0, (22, 15), 'xmax', None)],
        'fix_methods': [('prune_no_cn', {}), ('replace_whole', {'replace_map': REPLACE_DICT_1})]
    },
    '价税合计小写': {
        'length': None,
        'str_type': 'float', 
        'location': [(23, 'right', 0.4, 0.4, 0, (4, ), 'xmin', 'split')],
        'fix_methods': [('prune_amount', {})]
    },
    '销货单位名称': {
        'length': None,
        'str_type': 'str',  # cn
        'location': [(24, 'right', 0.2, 0.2, 0, (25, 3), 'xmin', None), (25, 'left', 0.3, 0.3, 0, (24, 15), 'xmax', None)]
    },
    '电话': {
        'length': None,
        'str_type': 'str',  # int + -
        'location': [(25, 'right', 0.3, 0.3, 0, (5, ), 'xmin', 'split')]
    },
    '纳税人识别号': {
        'length': None,
        'str_type': 'str',  # cn
        'location': [(0, 'right', 0.3, 0.3, 0, (26, 2.5), 'xmin', None), (26, 'left', 0.3, 0.3, 0, (0, 15), 'xmax', None)]
    },
    '账号': {
        'length': None,
        'str_type': 'str',
        'location': [(26, 'right', 0.3, 0.3, 0, (6, ), 'xmin', 'split')]
    },
    '地址': {
        'length': None,
        'str_type': 'str',  # cn
        'location': [(27, 'left', 0.3, 0.3, 0, (3, 4), 'merge', None), (3, 'right', 0.3, 0.3, 0, (27, 20), 'xmin', None)]
    },
    '开户银行': {
        'length': None,
        'str_type': 'str', # cn
        'location': [(27, 'right', 0.3, 0.3, 0, (3, ), 'xmin', 'split')]
    },
    '增值税税率或征收率': {
        'length': 3,
        'str_type': 'str',  # 13%
        'location': [(28, 'right', 0, 1, 0, (1, 30, 1), 'xmin', None), (29, 'right', 1, 0, 0, (1, 30, 1), 'xmin', None), 
                     (1, 'left', 0, 1, 0, (28, 29, 2), 'xmax', None), (30, 'left', 1, 0, 0, (28, 29, 2), 'xmax', None)],
        'fix_methods': [('replace_last_char', {'char_set': {'8', '9', '号'}, 'target_char': '%'})]

    },
    '增值税税额': {
        'length': None,
        'str_type': 'float',
        'location': [(1, 'right', 0, 1, 0, (31, 32, 2.5), 'xmin', None), (30, 'right', 1, 0, 0, (31, 32, 2.5), 'xmin', None), 
                     (31, 'left', 0, 1, 0, (1, 30, 2), 'xmax', None), (32, 'left', 1, 0, 0, (1, 30, 2), 'xmax', None)],
        'fix_methods': [('prune_amount', {})]
    },
    '主管税务机关及代码': {
        'length': None,
        'str_type': 'str', 
        'location': [(31, 'right', 0, 1.5, 0, (2, ), 'merge', None), (32, 'right', 1, 0.5, 0, (2, ), 'merge', None)]
    },
    '不含税价-小写': {
        'length': None,
        'str_type': 'float', # cn
        'location': [(34, 'left', 0.3, 0.3, 0, (33, 1.5), 'xmax', None), (33, 'right', 0.2, 0.2, 0, (34, 1.5), 'xmin', None)],
        'fix_methods': [('prune_amount', {})]
    },
    '完税凭证号码': {
        'length': None,
        'str_type': 'str', 
        'location': [(34, 'right', 0.2, 0.2, 0, (36, 1.5), 'xmin', None), (36, 'left', 0.2, 0.2, 0, (34, 6), 'xmax', None)]
    },
    '吨位': {
        'length': None,
        'str_type': 'str', 
        'location': [(36, 'right', 0.2, 0.2, 0, (37, 1), 'xmin', 'split'), (37, 'left', 0.2, 0.2, 0, (36, 0.5), 'xmax', None)]
    },
    '限乘人数': {
        'length': None,
        'str_type': 'int', 
        'location': [(37, 'right', 0.2, 0.2, 0, (0.5, ), 'xmin', 'split')]
    },
    '开票人': {
        'length': None,
        'str_type': 'str', 
        'location': [(35, 'right', 0, 0.5, 0, (1.5, ), 'xmin', 'split')]
    },
    '备注': {
        'length': None,
        'str_type': 'str', 
        'location': [(38, 'right', 0.2, 0.2, 0, (2, ), 'xmin', 'split')],
        'fix_methods': [('prune_first_char', {'char_set': {';', ':', ':'}})]
    },

}

INVOICE_CONST = {
    ARG_KEY_KEY_LIST: INVOICE_KEY_LIST,    
    ARG_KEY_VALUE_DICT: INVOICE_VALUE_DICT    
}