c1c49a8e by 周伟奇

license part 1

1 parent 96b67222
...@@ -478,3 +478,14 @@ BC_FIELD = (('CardNum', '银行卡号'), ...@@ -478,3 +478,14 @@ BC_FIELD = (('CardNum', '银行卡号'),
478 478
479 SUCCESS_CODE_SET = {'0', 0} 479 SUCCESS_CODE_SET = {'0', 0}
480 480
481 BC_PID = 4
482
483 OTHER_SET = {0, 1, 2}
484 BS_SET = {10, 11, 12}
485 LICENSE_SET_1 = {110, 111, 112}
486 LICENSE_SET_2 = {1110, 1111, 1112}
487
488 CLASSIFY_PID_DICT = {
489 0: (4, BC_KEY) # 银行卡
490 }
491
......
...@@ -4,6 +4,7 @@ import signal ...@@ -4,6 +4,7 @@ import signal
4 import asyncio 4 import asyncio
5 import aiohttp 5 import aiohttp
6 import difflib 6 import difflib
7 import base64
7 import requests 8 import requests
8 from datetime import datetime, date 9 from datetime import datetime, date
9 from collections import Counter 10 from collections import Counter
...@@ -30,7 +31,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -30,7 +31,8 @@ class Command(BaseCommand, LoggerMixin):
30 # 数据目录 31 # 数据目录
31 self.data_dir = conf.DATA_DIR 32 self.data_dir = conf.DATA_DIR
32 # ocr相关 33 # ocr相关
33 self.ocr_url = conf.OCR_URL 34 self.ocr_url_1 = conf.OCR_URL_1
35 self.ocr_url_2 = conf.OCR_URL_2
34 # EDMS web_service_api 36 # EDMS web_service_api
35 self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD) 37 self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD)
36 # 优雅退出信号:15 38 # 优雅退出信号:15
...@@ -79,7 +81,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -79,7 +81,7 @@ class Command(BaseCommand, LoggerMixin):
79 return doc_data_path, excel_path, src_excel_path, pdf_path 81 return doc_data_path, excel_path, src_excel_path, pdf_path
80 82
81 @staticmethod 83 @staticmethod
82 def append_bs_sheet(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence): 84 def bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence):
83 for i, sheet in enumerate(sheets): 85 for i, sheet in enumerate(sheets):
84 sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i) 86 sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i)
85 # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] 87 # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间']
...@@ -137,7 +139,39 @@ class Command(BaseCommand, LoggerMixin): ...@@ -137,7 +139,39 @@ class Command(BaseCommand, LoggerMixin):
137 words = cell.get('words') 139 words = cell.get('words')
138 ws.cell(row=r1+1, column=c1+1, value=words) 140 ws.cell(row=r1+1, column=c1+1, value=words)
139 141
142 def license2_process(self, img_path, license_summary, pid, license_key):
143 with open(img_path, 'rb') as f:
144 base64_data = base64.b64encode(f.read())
145 # 获取解码后的base64值
146 filedata = base64_data.decode()
147 # pid 产品的pid, key, secret 登录之后能够查看到
148 datas = {
149 "pid": str(pid),
150 "key": conf.OCR_KEY,
151 "secret": conf.OCR_SECRET,
152 "file": filedata
153 }
154 r = requests.post(self.ocr_url_2, data=datas)
155 if r.status_code == 200:
156 # 识别结果
157 response = r.json()
158 if response.get('ErrorCode') in consts.SUCCESS_CODE_SET:
159 if pid == consts.BC_PID:
160 # 银行卡
161 res_list = []
162 for en_key, chn_key in consts.BC_FIELD:
163 res_list.append((chn_key, response.get(en_key, '')))
164 license_summary.setdefault(license_key, []).append(res_list)
165 else:
166 # 营业执照、行驶证等
167 for result_dict in response.get('ResultList', []):
168 res_list = []
169 for field_dict in result_dict.get('FieldList', []):
170 res_list.append((field_dict.get('chn_key', ''), field_dict.get('value', '')))
171 license_summary.setdefault(license_key, []).append(res_list)
172
140 def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary): 173 def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary):
174 # # 流水
141 # res = { 175 # res = {
142 # 'code': 1, 176 # 'code': 1,
143 # 'msg': 'success', 177 # 'msg': 'success',
...@@ -156,18 +190,55 @@ class Command(BaseCommand, LoggerMixin): ...@@ -156,18 +190,55 @@ class Command(BaseCommand, LoggerMixin):
156 # ] 190 # ]
157 # } 191 # }
158 # } 192 # }
193 #
194 # # 证件-1
195 # res = {
196 # 'code': 1,
197 # 'msg': 'success',
198 # 'data': {
199 # 'classify': 0,
200 # 'confidence': 0.999,
201 # 'data': [
202 # {
203 # 'cn_key': 'value',
204 # 'cn_key': 'value',
205 # },
206 # {
207 # 'cn_key': 'value',
208 # 'cn_key': 'value',
209 # },
210 # ]
211 # }
212 # }
213 #
214 # # 证件-2 or 其他类
215 # res = {
216 # 'code': 1,
217 # 'msg': 'success',
218 # 'data': {
219 # 'classify': 0,
220 # 'confidence': 0.999,
221 # }
222 # }
223
159 data = res.get('data', {}) 224 data = res.get('data', {})
160 classify = data.get('classify') 225 classify = data.get('classify')
161 if classify is None: 226 if classify is None:
162 return 227 return
163 # if classify in 228 elif classify in consts.OTHER_SET: # 其他类
164 sheets = data.get('sheets', [])
165 if not sheets:
166 return 229 return
167 confidence = data.get('confidence', 1) 230 elif classify in consts.BS_SET: # 流水处理
168 self.append_bs_sheet(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence) 231 sheets = data.get('sheets', [])
169 # else: 232 if not sheets:
170 # pass 233 return
234 confidence = data.get('confidence', 1)
235 self.bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence)
236 elif classify in consts.LICENSE_SET_1: # 证件1
237 # self.license1_process() # TODO license1
238 pass
239 elif classify in consts.LICENSE_SET_2: # 证件2
240 pid, license_key = consts.CLASSIFY_PID_DICT.get(classify)
241 self.license2_process(license_summary, pid, license_key) # TODO reuse img data?
171 242
172 # async def fetch_ocr_result(self, img_path): 243 # async def fetch_ocr_result(self, img_path):
173 # async with aiohttp.ClientSession( 244 # async with aiohttp.ClientSession(
...@@ -188,8 +259,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -188,8 +259,9 @@ class Command(BaseCommand, LoggerMixin):
188 files = [ 259 files = [
189 ('img', open(img_path, 'rb')) 260 ('img', open(img_path, 'rb'))
190 ] 261 ]
191 response = requests.request("POST", self.ocr_url, files=files) 262 response = requests.request("POST", self.ocr_url_1, files=files)
192 return response.json() 263 if response.status_code == 200:
264 return response.json()
193 265
194 def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary): 266 def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary):
195 res = self.fetch_ocr_result(img_info[0]) 267 res = self.fetch_ocr_result(img_info[0])
...@@ -255,7 +327,6 @@ class Command(BaseCommand, LoggerMixin): ...@@ -255,7 +327,6 @@ class Command(BaseCommand, LoggerMixin):
255 summary['role'] = self.get_most(summary['role']) 327 summary['role'] = self.get_most(summary['role'])
256 return bs_summary 328 return bs_summary
257 329
258
259 def rebuild_bs_summary(self, bs_summary, unknown_summary): 330 def rebuild_bs_summary(self, bs_summary, unknown_summary):
260 # bs_summary = { 331 # bs_summary = {
261 # '卡号': { 332 # '卡号': {
...@@ -336,9 +407,12 @@ class Command(BaseCommand, LoggerMixin): ...@@ -336,9 +407,12 @@ class Command(BaseCommand, LoggerMixin):
336 return merged_bs_summary 407 return merged_bs_summary
337 408
338 # TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理 409 # TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理
339 # TODO 调用接口重试
340 # TODO 协程异步发送OCR请求 410 # TODO 协程异步发送OCR请求
411 # TODO 调用接口重试
341 # TODO 异常邮件通知 412 # TODO 异常邮件通知
413 # 识别失败:普通异常,如PDF异常、构建过程异常
414 # EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件
415 # 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件
342 # TODO 数据库断联问题 416 # TODO 数据库断联问题
343 # TODO 非流水证件处理 417 # TODO 非流水证件处理
344 # TODO EDMS API GATEWAY 418 # TODO EDMS API GATEWAY
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!