c1c49a8e by 周伟奇

license part 1

1 parent 96b67222
......@@ -478,3 +478,14 @@ BC_FIELD = (('CardNum', '银行卡号'),
SUCCESS_CODE_SET = {'0', 0}
BC_PID = 4
OTHER_SET = {0, 1, 2}
BS_SET = {10, 11, 12}
LICENSE_SET_1 = {110, 111, 112}
LICENSE_SET_2 = {1110, 1111, 1112}
CLASSIFY_PID_DICT = {
0: (4, BC_KEY) # 银行卡
}
......
......@@ -4,6 +4,7 @@ import signal
import asyncio
import aiohttp
import difflib
import base64
import requests
from datetime import datetime, date
from collections import Counter
......@@ -30,7 +31,8 @@ class Command(BaseCommand, LoggerMixin):
# 数据目录
self.data_dir = conf.DATA_DIR
# ocr相关
self.ocr_url = conf.OCR_URL
self.ocr_url_1 = conf.OCR_URL_1
self.ocr_url_2 = conf.OCR_URL_2
# EDMS web_service_api
self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD)
# 优雅退出信号:15
......@@ -79,7 +81,7 @@ class Command(BaseCommand, LoggerMixin):
return doc_data_path, excel_path, src_excel_path, pdf_path
@staticmethod
def append_bs_sheet(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence):
def bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence):
for i, sheet in enumerate(sheets):
sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i)
# ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间']
......@@ -137,7 +139,39 @@ class Command(BaseCommand, LoggerMixin):
words = cell.get('words')
ws.cell(row=r1+1, column=c1+1, value=words)
def license2_process(self, img_path, license_summary, pid, license_key):
with open(img_path, 'rb') as f:
base64_data = base64.b64encode(f.read())
# 获取解码后的base64值
filedata = base64_data.decode()
# pid 产品的pid, key, secret 登录之后能够查看到
datas = {
"pid": str(pid),
"key": conf.OCR_KEY,
"secret": conf.OCR_SECRET,
"file": filedata
}
r = requests.post(self.ocr_url_2, data=datas)
if r.status_code == 200:
# 识别结果
response = r.json()
if response.get('ErrorCode') in consts.SUCCESS_CODE_SET:
if pid == consts.BC_PID:
# 银行卡
res_list = []
for en_key, chn_key in consts.BC_FIELD:
res_list.append((chn_key, response.get(en_key, '')))
license_summary.setdefault(license_key, []).append(res_list)
else:
# 营业执照、行驶证等
for result_dict in response.get('ResultList', []):
res_list = []
for field_dict in result_dict.get('FieldList', []):
res_list.append((field_dict.get('chn_key', ''), field_dict.get('value', '')))
license_summary.setdefault(license_key, []).append(res_list)
def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary):
# # 流水
# res = {
# 'code': 1,
# 'msg': 'success',
......@@ -156,18 +190,55 @@ class Command(BaseCommand, LoggerMixin):
# ]
# }
# }
#
# # 证件-1
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'data': [
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# ]
# }
# }
#
# # 证件-2 or 其他类
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# }
# }
data = res.get('data', {})
classify = data.get('classify')
if classify is None:
return
# if classify in
sheets = data.get('sheets', [])
if not sheets:
elif classify in consts.OTHER_SET: # 其他类
return
confidence = data.get('confidence', 1)
self.append_bs_sheet(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence)
# else:
# pass
elif classify in consts.BS_SET: # 流水处理
sheets = data.get('sheets', [])
if not sheets:
return
confidence = data.get('confidence', 1)
self.bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence)
elif classify in consts.LICENSE_SET_1: # 证件1
# self.license1_process() # TODO license1
pass
elif classify in consts.LICENSE_SET_2: # 证件2
pid, license_key = consts.CLASSIFY_PID_DICT.get(classify)
self.license2_process(license_summary, pid, license_key) # TODO reuse img data?
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
......@@ -188,8 +259,9 @@ class Command(BaseCommand, LoggerMixin):
files = [
('img', open(img_path, 'rb'))
]
response = requests.request("POST", self.ocr_url, files=files)
return response.json()
response = requests.request("POST", self.ocr_url_1, files=files)
if response.status_code == 200:
return response.json()
def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary):
res = self.fetch_ocr_result(img_info[0])
......@@ -255,7 +327,6 @@ class Command(BaseCommand, LoggerMixin):
summary['role'] = self.get_most(summary['role'])
return bs_summary
def rebuild_bs_summary(self, bs_summary, unknown_summary):
# bs_summary = {
# '卡号': {
......@@ -336,9 +407,12 @@ class Command(BaseCommand, LoggerMixin):
return merged_bs_summary
# TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理
# TODO 调用接口重试
# TODO 协程异步发送OCR请求
# TODO 调用接口重试
# TODO 异常邮件通知
# 识别失败:普通异常,如PDF异常、构建过程异常
# EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件
# 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件
# TODO 数据库断联问题
# TODO 非流水证件处理
# TODO EDMS API GATEWAY
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!