license part 1
Showing
2 changed files
with
98 additions
and
13 deletions
... | @@ -478,3 +478,14 @@ BC_FIELD = (('CardNum', '银行卡号'), | ... | @@ -478,3 +478,14 @@ BC_FIELD = (('CardNum', '银行卡号'), |
478 | 478 | ||
479 | SUCCESS_CODE_SET = {'0', 0} | 479 | SUCCESS_CODE_SET = {'0', 0} |
480 | 480 | ||
481 | BC_PID = 4 | ||
482 | |||
483 | OTHER_SET = {0, 1, 2} | ||
484 | BS_SET = {10, 11, 12} | ||
485 | LICENSE_SET_1 = {110, 111, 112} | ||
486 | LICENSE_SET_2 = {1110, 1111, 1112} | ||
487 | |||
488 | CLASSIFY_PID_DICT = { | ||
489 | 0: (4, BC_KEY) # 银行卡 | ||
490 | } | ||
491 | ... | ... |
... | @@ -4,6 +4,7 @@ import signal | ... | @@ -4,6 +4,7 @@ import signal |
4 | import asyncio | 4 | import asyncio |
5 | import aiohttp | 5 | import aiohttp |
6 | import difflib | 6 | import difflib |
7 | import base64 | ||
7 | import requests | 8 | import requests |
8 | from datetime import datetime, date | 9 | from datetime import datetime, date |
9 | from collections import Counter | 10 | from collections import Counter |
... | @@ -30,7 +31,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -30,7 +31,8 @@ class Command(BaseCommand, LoggerMixin): |
30 | # 数据目录 | 31 | # 数据目录 |
31 | self.data_dir = conf.DATA_DIR | 32 | self.data_dir = conf.DATA_DIR |
32 | # ocr相关 | 33 | # ocr相关 |
33 | self.ocr_url = conf.OCR_URL | 34 | self.ocr_url_1 = conf.OCR_URL_1 |
35 | self.ocr_url_2 = conf.OCR_URL_2 | ||
34 | # EDMS web_service_api | 36 | # EDMS web_service_api |
35 | self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD) | 37 | self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD) |
36 | # 优雅退出信号:15 | 38 | # 优雅退出信号:15 |
... | @@ -79,7 +81,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -79,7 +81,7 @@ class Command(BaseCommand, LoggerMixin): |
79 | return doc_data_path, excel_path, src_excel_path, pdf_path | 81 | return doc_data_path, excel_path, src_excel_path, pdf_path |
80 | 82 | ||
81 | @staticmethod | 83 | @staticmethod |
82 | def append_bs_sheet(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence): | 84 | def bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence): |
83 | for i, sheet in enumerate(sheets): | 85 | for i, sheet in enumerate(sheets): |
84 | sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i) | 86 | sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i) |
85 | # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] | 87 | # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] |
... | @@ -137,7 +139,39 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -137,7 +139,39 @@ class Command(BaseCommand, LoggerMixin): |
137 | words = cell.get('words') | 139 | words = cell.get('words') |
138 | ws.cell(row=r1+1, column=c1+1, value=words) | 140 | ws.cell(row=r1+1, column=c1+1, value=words) |
139 | 141 | ||
142 | def license2_process(self, img_path, license_summary, pid, license_key): | ||
143 | with open(img_path, 'rb') as f: | ||
144 | base64_data = base64.b64encode(f.read()) | ||
145 | # 获取解码后的base64值 | ||
146 | filedata = base64_data.decode() | ||
147 | # pid 产品的pid, key, secret 登录之后能够查看到 | ||
148 | datas = { | ||
149 | "pid": str(pid), | ||
150 | "key": conf.OCR_KEY, | ||
151 | "secret": conf.OCR_SECRET, | ||
152 | "file": filedata | ||
153 | } | ||
154 | r = requests.post(self.ocr_url_2, data=datas) | ||
155 | if r.status_code == 200: | ||
156 | # 识别结果 | ||
157 | response = r.json() | ||
158 | if response.get('ErrorCode') in consts.SUCCESS_CODE_SET: | ||
159 | if pid == consts.BC_PID: | ||
160 | # 银行卡 | ||
161 | res_list = [] | ||
162 | for en_key, chn_key in consts.BC_FIELD: | ||
163 | res_list.append((chn_key, response.get(en_key, ''))) | ||
164 | license_summary.setdefault(license_key, []).append(res_list) | ||
165 | else: | ||
166 | # 营业执照、行驶证等 | ||
167 | for result_dict in response.get('ResultList', []): | ||
168 | res_list = [] | ||
169 | for field_dict in result_dict.get('FieldList', []): | ||
170 | res_list.append((field_dict.get('chn_key', ''), field_dict.get('value', ''))) | ||
171 | license_summary.setdefault(license_key, []).append(res_list) | ||
172 | |||
140 | def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary): | 173 | def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary): |
174 | # # 流水 | ||
141 | # res = { | 175 | # res = { |
142 | # 'code': 1, | 176 | # 'code': 1, |
143 | # 'msg': 'success', | 177 | # 'msg': 'success', |
... | @@ -156,18 +190,55 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -156,18 +190,55 @@ class Command(BaseCommand, LoggerMixin): |
156 | # ] | 190 | # ] |
157 | # } | 191 | # } |
158 | # } | 192 | # } |
193 | # | ||
194 | # # 证件-1 | ||
195 | # res = { | ||
196 | # 'code': 1, | ||
197 | # 'msg': 'success', | ||
198 | # 'data': { | ||
199 | # 'classify': 0, | ||
200 | # 'confidence': 0.999, | ||
201 | # 'data': [ | ||
202 | # { | ||
203 | # 'cn_key': 'value', | ||
204 | # 'cn_key': 'value', | ||
205 | # }, | ||
206 | # { | ||
207 | # 'cn_key': 'value', | ||
208 | # 'cn_key': 'value', | ||
209 | # }, | ||
210 | # ] | ||
211 | # } | ||
212 | # } | ||
213 | # | ||
214 | # # 证件-2 or 其他类 | ||
215 | # res = { | ||
216 | # 'code': 1, | ||
217 | # 'msg': 'success', | ||
218 | # 'data': { | ||
219 | # 'classify': 0, | ||
220 | # 'confidence': 0.999, | ||
221 | # } | ||
222 | # } | ||
223 | |||
159 | data = res.get('data', {}) | 224 | data = res.get('data', {}) |
160 | classify = data.get('classify') | 225 | classify = data.get('classify') |
161 | if classify is None: | 226 | if classify is None: |
162 | return | 227 | return |
163 | # if classify in | 228 | elif classify in consts.OTHER_SET: # 其他类 |
164 | sheets = data.get('sheets', []) | ||
165 | if not sheets: | ||
166 | return | 229 | return |
167 | confidence = data.get('confidence', 1) | 230 | elif classify in consts.BS_SET: # 流水处理 |
168 | self.append_bs_sheet(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence) | 231 | sheets = data.get('sheets', []) |
169 | # else: | 232 | if not sheets: |
170 | # pass | 233 | return |
234 | confidence = data.get('confidence', 1) | ||
235 | self.bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence) | ||
236 | elif classify in consts.LICENSE_SET_1: # 证件1 | ||
237 | # self.license1_process() # TODO license1 | ||
238 | pass | ||
239 | elif classify in consts.LICENSE_SET_2: # 证件2 | ||
240 | pid, license_key = consts.CLASSIFY_PID_DICT.get(classify) | ||
241 | self.license2_process(license_summary, pid, license_key) # TODO reuse img data? | ||
171 | 242 | ||
172 | # async def fetch_ocr_result(self, img_path): | 243 | # async def fetch_ocr_result(self, img_path): |
173 | # async with aiohttp.ClientSession( | 244 | # async with aiohttp.ClientSession( |
... | @@ -188,8 +259,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -188,8 +259,9 @@ class Command(BaseCommand, LoggerMixin): |
188 | files = [ | 259 | files = [ |
189 | ('img', open(img_path, 'rb')) | 260 | ('img', open(img_path, 'rb')) |
190 | ] | 261 | ] |
191 | response = requests.request("POST", self.ocr_url, files=files) | 262 | response = requests.request("POST", self.ocr_url_1, files=files) |
192 | return response.json() | 263 | if response.status_code == 200: |
264 | return response.json() | ||
193 | 265 | ||
194 | def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary): | 266 | def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary): |
195 | res = self.fetch_ocr_result(img_info[0]) | 267 | res = self.fetch_ocr_result(img_info[0]) |
... | @@ -255,7 +327,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -255,7 +327,6 @@ class Command(BaseCommand, LoggerMixin): |
255 | summary['role'] = self.get_most(summary['role']) | 327 | summary['role'] = self.get_most(summary['role']) |
256 | return bs_summary | 328 | return bs_summary |
257 | 329 | ||
258 | |||
259 | def rebuild_bs_summary(self, bs_summary, unknown_summary): | 330 | def rebuild_bs_summary(self, bs_summary, unknown_summary): |
260 | # bs_summary = { | 331 | # bs_summary = { |
261 | # '卡号': { | 332 | # '卡号': { |
... | @@ -336,9 +407,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -336,9 +407,12 @@ class Command(BaseCommand, LoggerMixin): |
336 | return merged_bs_summary | 407 | return merged_bs_summary |
337 | 408 | ||
338 | # TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理 | 409 | # TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理 |
339 | # TODO 调用接口重试 | ||
340 | # TODO 协程异步发送OCR请求 | 410 | # TODO 协程异步发送OCR请求 |
411 | # TODO 调用接口重试 | ||
341 | # TODO 异常邮件通知 | 412 | # TODO 异常邮件通知 |
413 | # 识别失败:普通异常,如PDF异常、构建过程异常 | ||
414 | # EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件 | ||
415 | # 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件 | ||
342 | # TODO 数据库断联问题 | 416 | # TODO 数据库断联问题 |
343 | # TODO 非流水证件处理 | 417 | # TODO 非流水证件处理 |
344 | # TODO EDMS API GATEWAY | 418 | # TODO EDMS API GATEWAY | ... | ... |
-
Please register or sign in to post a comment