1c6d880f by 周伟奇

add license

1 parent c1c49a8e
......@@ -95,7 +95,7 @@ HEADERS_MAPPING.update(
HEADERS_MAPPING.update(
{
'交易日期': BASE_HEADERS_MAPPING['记账日期'],
'存入': BASE_HEADERS_MAPPING['金额'],
# '存入': BASE_HEADERS_MAPPING['金额'],
'对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
'对方名称': BASE_HEADERS_MAPPING['对方账户名'],
'摘要': BASE_HEADERS_MAPPING['附言'],
......@@ -160,6 +160,12 @@ HEADERS_MAPPING.update(
'交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 农业银行-窄页
HEADERS_MAPPING.update(
{
'交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
}
)
# 竖版-特殊-农商行
HEADERS_MAPPING.update(
{
......@@ -299,17 +305,27 @@ HEADERS_MAPPING.update(
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则)
#
# # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)), # 横版-表格-农业银行-中国农业银行个人账户明细
# ('农业银行-10', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), # 横版-表格-农业银行-中国农业银行个人账户明细
#
# # 农业银行:序号 日期 摘要 交易金额 余额 对方账号 对方名称 交易地点 渠道 附言
# ('农业银行-10-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
#
# # 农业银行:交易日期 摘要 交易金额 余额 交易渠道 交易网点 对方账号 对方名称 附言
# ('农业银行-9', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
#
# # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行
#
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 渠道
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
#
# # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 对方户名 对方账号 渠道
# ('工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
#
# # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行
# # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('建设银行-竖版', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
# ('建设银行-横版', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
#
# # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
......@@ -320,7 +336,13 @@ HEADERS_MAPPING.update(
# # -----------------普通打印:部分格线--------------------------------
#
# # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行-5', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
#
# # 农业银行:日期 地点 摘要 存入 支出 余额 对方账号 对方户名
# ('农业银行-8', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
# # 农业银行:日期 摘要 交易金额 余额 地点 交易对手账号 对方户名
# ('农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
#
# # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注
# ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)),
......@@ -330,6 +352,9 @@ HEADERS_MAPPING.update(
#
# # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
# # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名
# ('建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
#
# # -----------------普通打印:无格线-------------------------------------
#
......@@ -338,7 +363,8 @@ HEADERS_MAPPING.update(
#
# # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
# # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单
# ('邮储银行', (1, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('邮储银行-8', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
# ('邮储银行-5', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
#
# # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道
# ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
......@@ -351,13 +377,15 @@ HEADERS_MAPPING.update(
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名
# ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
# ('农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
#
# # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
#
# # 农业银行:日期、时间、[日志号]、短摘要、交易金额、本次余额、交易网点、渠道、附言
# ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
# # 农业银行:日期、时间、短摘要、交易金额、本次余额、交易网点、渠道、附言
# # 农业银行:日期、时间、日志号、短摘要、交易金额、本次余额、交易网点、渠道、附言
# ('农业银行', (1, 2, 4, 5, None, 3, None, None, None, None, None, None, None)),
# ('农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
......@@ -374,11 +402,10 @@ HEADERS_MAPPING.update(
#
# # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
#
# # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名
# ('建设银行', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
# ]
OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None)
# {
# "0":"其他",
# "1":"普通打印-全表格-中国农业银行",
......@@ -408,67 +435,163 @@ HEADERS_MAPPING.update(
# "22":"针式打印-部分格线-邮储银行一本通绿卡"
# }
# CLASSIFY_LIST = [
# ('其他', OTHER_TUPLE),
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
# ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
#
# ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
# ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
# ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
# ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
# ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
# ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
#
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
#
# ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
# ]
# "4":"普通打印-全表格-中国银行",
# "5":"普通打印-全表格-农业银行-10列",
# "6":"普通打印-全表格-农业银行-10列-1",
# "7":"普通打印-全表格-农业银行-9列",
# "8":"普通打印-全表格-北京银行",
# "9":"普通打印-全表格-工商银行",
# "10":"普通打印-全表格-工商银行-电子账单",
# "11":"普通打印-全表格-建设银行",
# "12":"普通打印-全表格-微信账单",
# "13":"普通打印-全表格-支付宝账单",
# "14":"普通打印-无格线-交通银行",
# "15":"普通打印-无格线-储蓄银行-5列",
# "16":"普通打印-无格线-储蓄银行-8列",
# "17":"普通打印-无格线-农业银行-扩张缩进",
# "18":"普通打印-无格线-农业银行-整数",
# "19":"普通打印-无格线-招商银行",
# "20":"普通打印-无格线-招商银行-电子账单",
# "21":"普通打印-无格线-民生银行",
# "22":"普通打印-部分格线-横版-中信银行",
# "23":"普通打印-部分格线-竖版-农业银行-5列",
# "24":"普通打印-部分格线-竖版-农业银行-8列",
# "25":"普通打印-部分格线-竖版-农业银行-窄页",
# "26":"普通打印-部分格线-竖版-平安电子账单",
# "27":"普通打印-部分格线-竖版-建设银行-电子账单",
# "34":"针式打印-全格线-建设银行",
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
CLASSIFY_LIST = [
('其他', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('普通打印-全表格-中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
('普通打印-全表格-农业银行-10列', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)),
('普通打印-全表格-农业银行-10列-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
('普通打印-全表格-农业银行-9列', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
('普通打印-全表格-北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
('普通打印-全表格-工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
('普通打印-全表格-工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
('普通打印-全表格-建设银行', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
('普通打印-全表格-微信账单', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
('普通打印-全表格-支付宝账单', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
('普通打印-无格线-交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
('普通打印-无格线-储蓄银行-5列', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
('普通打印-无格线-储蓄银行-8列', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
('普通打印-无格线-农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
('普通打印-无格线-农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
('普通打印-无格线-招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
('普通打印-无格线-招商银行-电子账单', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
('普通打印-无格线-民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
('普通打印-部分格线-横版-中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
('普通打印-部分格线-竖版-农业银行-5列', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
('普通打印-部分格线-竖版-农业银行-8列', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
('普通打印-部分格线-竖版-农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
('普通打印-部分格线-竖版-平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
('普通打印-部分格线-竖版-建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('针式打印-全格线-建设银行', OTHER_TUPLE),
('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('其他', OTHER_TUPLE),
]
# ----------license相关------------------------------------------------------------------------------------------------
# "0":"AVT Invioce",
# "1":"二手车发票",
# "2":"其他",
# "3":"护照",
# "28":"机动车登记证",
# "29":"机动车销售统一发票",
# "30":"港澳通行证",
# "31":"营业执照",
# "32":"行驶证",
# "33":"身份证",
# "37":"银行卡"
# 其他
OTHER_CLASSIFY = 2
# 身份证
IC_CN_NAME = '身份证'
IC_CLASSIFY = 33
# 增值税发票
VAT_CN_NAME = '增值税发票'
VAT_CLASSIFY = 0
# 机动车登记证书
MVC_CN_NAME = '机动车登记证书'
MVC_CLASSIFY = 28
# 机动车销售统一发票
MVI_CN_NAME = '机动车销售统一发票'
MVI_CLASSIFY = 29
IC_PID = VAT_PID = MVC_PID = MVI_PID = None
# 营业执照
BL_KEY = 'bl'
BL_CN_NAME = '营业执照'
BL_CLASSIFY = 31
BL_PID = 41
# 二手车发票
UCI_KEY = 'uci'
UCI_CN_NAME = '二手车发票'
UCI_CLASSIFY = 1
UCI_PID = 60
# 港澳台通行证
EEP_KEY = 'eep'
EEP_CN_NAME = '港澳台通行证'
EEP_CLASSIFY = 30
EEP_PID = 1018
# 行驶证
DL_KEY = 'dl'
DL_CN_NAME = '行驶证'
DL_CLASSIFY = 32
DL_PID = 5
# 护照
PP_KEY = 'pp'
PP_CN_NAME = '护照'
PP_CLASSIFY = 3
PP_PID = 8
# 银行卡
BC_KEY = 'bc'
# 身份证
IC_KEY = 'ic'
# 机动车登记证书
MVC_KEY = 'mvc'
# 机动车销售统一发票
MVI_KEY = 'mvi'
# 增值税发票
VAT_KEY = 'vat'
LICENSE_ORDER = ((MVI_KEY, '机动车销售统一发票'),
(IC_KEY, '身份证'),
(BC_KEY, '银行卡'),
(BL_KEY, '营业执照'),
(UCI_KEY, '二手车发票'),
(EEP_KEY, '港澳台通行证'),
(DL_KEY, '行驶证'),
(PP_KEY, '护照'),
(MVC_KEY, '机动车登记证书'),
(VAT_KEY, '增值税发票'))
BC_CN_NAME = '银行卡'
BC_CLASSIFY = 37
BC_PID = 4
BC_FIELD = (('CardNum', '银行卡号'),
('BankName', '发卡行名称'),
('CardName', '银行卡名称'),
......@@ -478,14 +601,19 @@ BC_FIELD = (('CardNum', '银行卡号'),
SUCCESS_CODE_SET = {'0', 0}
BC_PID = 4
OTHER_SET = {0, 1, 2}
BS_SET = {10, 11, 12}
LICENSE_SET_1 = {110, 111, 112}
LICENSE_SET_2 = {1110, 1111, 1112}
CLASSIFY_PID_DICT = {
0: (4, BC_KEY) # 银行卡
}
LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME)),
(IC_CLASSIFY, (IC_PID, IC_CN_NAME)),
(BC_CLASSIFY, (BC_PID, BC_CN_NAME)),
(BL_CLASSIFY, (BL_PID, BL_CN_NAME)),
(UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME)),
(EEP_CLASSIFY, (EEP_PID, EEP_CN_NAME)),
(DL_CLASSIFY, (DL_PID, DL_CN_NAME)),
(PP_CLASSIFY, (PP_PID, PP_CN_NAME)),
(MVC_CLASSIFY, (MVC_PID, MVC_CN_NAME)),
(VAT_CLASSIFY, (VAT_PID, VAT_CN_NAME)))
LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY}
LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}
......
......@@ -81,9 +81,14 @@ class Command(BaseCommand, LoggerMixin):
return doc_data_path, excel_path, src_excel_path, pdf_path
@staticmethod
def bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence):
def bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify):
sheets = ocr_data.get('data', [])
if not sheets:
return
confidence = ocr_data.get('confidence', 1)
img_name, _ = os.path.splitext(os.path.basename(img_path))
for i, sheet in enumerate(sheets):
sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i)
sheet_name = '{0}_{1}'.format(img_name, i)
# ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间']
summary = sheet.get('summary')
card = summary[1]
......@@ -139,38 +144,52 @@ class Command(BaseCommand, LoggerMixin):
words = cell.get('words')
ws.cell(row=r1+1, column=c1+1, value=words)
def license2_process(self, img_path, license_summary, pid, license_key):
with open(img_path, 'rb') as f:
base64_data = base64.b64encode(f.read())
# 获取解码后的base64值
filedata = base64_data.decode()
# pid 产品的pid, key, secret 登录之后能够查看到
datas = {
"pid": str(pid),
"key": conf.OCR_KEY,
"secret": conf.OCR_SECRET,
"file": filedata
}
r = requests.post(self.ocr_url_2, data=datas)
if r.status_code == 200:
# 识别结果
response = r.json()
if response.get('ErrorCode') in consts.SUCCESS_CODE_SET:
if pid == consts.BC_PID:
# 银行卡
res_list = []
for en_key, chn_key in consts.BC_FIELD:
res_list.append((chn_key, response.get(en_key, '')))
license_summary.setdefault(license_key, []).append(res_list)
else:
# 营业执照、行驶证等
for result_dict in response.get('ResultList', []):
res_list = []
for field_dict in result_dict.get('FieldList', []):
res_list.append((field_dict.get('chn_key', ''), field_dict.get('value', '')))
license_summary.setdefault(license_key, []).append(res_list)
def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary):
@staticmethod
def license1_process(ocr_data, license_summary, classify):
license_data = ocr_data.get('data', [])
if not license_data:
return
_, license_key = consts.CLASSIFY_PID_DICT.get(classify)
for license_dict in license_data:
res_list = []
for field, value in license_dict.items():
res_list.append((field, value))
license_summary.setdefault(license_key, []).append(res_list)
@staticmethod
def license2_process(ocr_res_2, license_summary, pid, classify):
if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
if pid == consts.BC_PID:
# 银行卡
res_list = []
for en_key, chn_key in consts.BC_FIELD:
res_list.append((chn_key, ocr_res_2.get(en_key, '')))
license_summary.setdefault(classify, []).append(res_list)
else:
# 营业执照、行驶证等
for result_dict in ocr_res_2.get('ResultList', []):
res_list = []
for field_dict in result_dict.get('FieldList', []):
res_list.append(
(field_dict.get('chn_key', ''), field_dict.get('value', '')))
license_summary.setdefault(classify, []).append(res_list)
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
# ) as session:
# json_data = self.get_ocr_json(img_path)
# async with session.post(self.ocr_url, json=json_data) as response:
# return await response.json()
#
# async def img_2_ocr_2_wb(self, wb, img_path, summary):
# res = await self.fetch_ocr_result(img_path)
# self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
# sheets_list = res.get('result').get('res')
# img_name = os.path.basename(img_path)
# self.append_sheet(wb, sheets_list, img_name, summary)
def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
# # 流水
# res = {
# 'code': 1,
......@@ -178,7 +197,7 @@ class Command(BaseCommand, LoggerMixin):
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'sheets': [
# 'data': [
# {
# 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# 'cells': []
......@@ -220,55 +239,52 @@ class Command(BaseCommand, LoggerMixin):
# 'confidence': 0.999,
# }
# }
with open(img_path, 'rb') as f:
base64_data = base64.b64encode(f.read())
# 获取解码后的base64值
file_data = base64_data.decode()
json_data_1 = {
"file": file_data
}
response_1 = requests.post(self.ocr_url_1, data=json_data_1)
if response_1.status_code == 200:
ocr_res_1 = response_1.json()
self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format(
self.log_base, img_path, ocr_res_1))
if ocr_res_1.get('code') == 1:
ocr_data = ocr_res_1.get('data', {})
classify = ocr_data.get('classify')
if classify is None:
return
elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
return
elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
self.license1_process(ocr_data, license_summary, classify)
elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
json_data_2 = {
"pid": str(pid),
"key": conf.OCR_KEY,
"secret": conf.OCR_SECRET,
"file": file_data
}
response_2 = requests.post(self.ocr_url_2, data=json_data_2)
if response_2.status_code == 200:
# 识别结果
ocr_res_2 = response_2.json()
self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format(
self.log_base, img_path, ocr_res_2))
self.license2_process(ocr_res_2, license_summary, pid, classify)
else:
raise Exception('ocr 2 error, img_path={0}'.format(img_path))
else: # 流水处理
self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify)
else:
pass
else:
raise Exception('ocr 1 error, img_path={0}'.format(img_path))
data = res.get('data', {})
classify = data.get('classify')
if classify is None:
return
elif classify in consts.OTHER_SET: # 其他类
return
elif classify in consts.BS_SET: # 流水处理
sheets = data.get('sheets', [])
if not sheets:
return
confidence = data.get('confidence', 1)
self.bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence)
elif classify in consts.LICENSE_SET_1: # 证件1
# self.license1_process() # TODO license1
pass
elif classify in consts.LICENSE_SET_2: # 证件2
pid, license_key = consts.CLASSIFY_PID_DICT.get(classify)
self.license2_process(license_summary, pid, license_key) # TODO reuse img data?
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
# ) as session:
# json_data = self.get_ocr_json(img_path)
# async with session.post(self.ocr_url, json=json_data) as response:
# return await response.json()
#
# async def img_2_ocr_2_wb(self, wb, img_path, summary):
# res = await self.fetch_ocr_result(img_path)
# self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
# sheets_list = res.get('result').get('res')
# img_name = os.path.basename(img_path)
# self.append_sheet(wb, sheets_list, img_name, summary)
def fetch_ocr_result(self, img_path):
files = [
('img', open(img_path, 'rb'))
]
response = requests.request("POST", self.ocr_url_1, files=files)
if response.status_code == 200:
return response.json()
def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary):
res = self.fetch_ocr_result(img_info[0])
self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(
self.log_base, img_info[0], res))
if res.get('code') == 1:
self.ocr_2_wb(res, wb, img_info[1], img_info[2], bs_summary, unknown_summary, license_summary)
@staticmethod
def get_most(value_list):
......@@ -414,7 +430,6 @@ class Command(BaseCommand, LoggerMixin):
# EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件
# 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件
# TODO 数据库断联问题
# TODO 非流水证件处理
# TODO EDMS API GATEWAY
def handle(self, *args, **kwargs):
sleep_second = int(conf.SLEEP_SECOND)
......@@ -466,8 +481,8 @@ class Command(BaseCommand, LoggerMixin):
# loop.run_until_complete(asyncio.wait(tasks))
# loop.close()
for img_info in pdf_handler.img_info_list:
self.img_2_ocr_2_wb(wb, img_info, bs_summary, unknown_summary, license_summary)
for img_path in pdf_handler.img_path_list:
self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format(
self.log_base, bs_summary, unknown_summary, license_summary))
......
......@@ -168,7 +168,7 @@ class BSWorkbook(Workbook):
month_mapping.setdefault(month_list[-1], []).insert(
0, (ws.title, idx_list[-1] + min_row, ws.max_row, 0))
def build_metadata_rows(self, classify, confidence, role, code, print_time, start_date, end_date):
def build_metadata_rows(self, confidence, code, print_time, start_date, end_date):
if start_date is None or end_date is None:
timedelta = None
else:
......@@ -176,10 +176,6 @@ class BSWorkbook(Workbook):
metadata_rows = [
('流水识别置信度', confidence),
self.blank_row,
('分类结果', classify),
self.blank_row,
('户名', role),
self.blank_row,
self.code_header,
]
metadata_rows.extend(code)
......@@ -200,19 +196,19 @@ class BSWorkbook(Workbook):
ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card))
return ms
def build_meta_sheet(self, card, classify, confidence, role, code, print_time, start_date, end_date):
metadata_rows = self.build_metadata_rows(classify, confidence, role, code, print_time, start_date, end_date)
def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date):
metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date)
ms = self.create_meta_sheet(card)
for row in metadata_rows:
ms.append(row)
return ms
def build_month_sheet(self, role, month_mapping, ms, is_reverse):
def build_month_sheet(self, card, month_mapping, ms, is_reverse):
tmp_ws = self.create_sheet('tmp_ws')
for month in sorted(month_mapping.keys()):
# 3.1.拷贝数据
parts = month_mapping.get(month)
new_ws = self.create_sheet('{0}({1})'.format(month, role))
new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:]))
new_ws.append(consts.FIXED_HEADERS)
for part in parts:
ws = self.get_sheet_by_name(part[0])
......@@ -338,9 +334,7 @@ class BSWorkbook(Workbook):
# 2.元信息提取表
ms = self.build_meta_sheet(card,
summary['classify'],
summary['confidence'],
summary['role'],
summary['code'],
summary['print_time'],
start_date,
......@@ -359,13 +353,16 @@ class BSWorkbook(Workbook):
self.remove(self.get_sheet_by_name(sheet))
def license_rebuild(self, license_summary):
for en_key, cn_key in consts.LICENSE_ORDER:
ws = self.create_sheet(cn_key)
for bl in license_summary.get(en_key, []):
for classify, (_, name) in consts.LICENSE_ORDER:
res = license_summary.get(classify)
if res is None:
continue
ws = self.create_sheet(name)
for bl in res:
for bl_field in bl:
ws.append(bl_field)
ws.append((None, ))
def rebuild(self, bs_summary, license_summary):
self.bs_rebuild(bs_summary)
# self.license_rebuild(license_summary)
self.license_rebuild(license_summary)
......
......@@ -25,7 +25,7 @@ class PDFHandler:
def __init__(self, path, img_dir_path):
self.path = path
self.img_dir_path = img_dir_path
self.img_info_list = []
self.img_path_list = []
self.xref_set = set()
def get_img_save_path(self, pno, img_index=0, ext='png'):
......@@ -38,7 +38,7 @@ class PDFHandler:
pm = page.getPixmap(matrix=trans_2, alpha=False)
img_save_path = self.get_img_save_path(page.number)
pm.writePNG(img_save_path)
self.img_info_list.append((img_save_path, page.number, 0))
self.img_path_list.append(img_save_path)
@staticmethod
def getimage(pix):
......@@ -88,7 +88,7 @@ class PDFHandler:
with open(img_save_path, "wb") as f:
f.write(img_data)
self.xref_set.add(xref)
self.img_info_list.append((img_save_path, pno, img_index))
self.img_path_list.append(img_save_path)
@staticmethod
def split_il(il):
......@@ -179,7 +179,7 @@ class PDFHandler:
img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2])
new_img.save(img_save_path)
page_to_png = False
self.img_info_list.append((img_save_path, pno, img_index))
self.img_path_list.append(img_save_path)
# 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片
if page_to_png:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!