add asyncio
Showing
2 changed files
with
117 additions
and
78 deletions
... | @@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin): |
175 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) | 175 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) |
176 | license_summary.setdefault(classify, []).append(res_list) | 176 | license_summary.setdefault(classify, []).append(res_list) |
177 | 177 | ||
178 | # async def fetch_ocr_result(self, img_path): | 178 | async def fetch_ocr_result(self, url, json_data): |
179 | # async with aiohttp.ClientSession( | 179 | async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: |
180 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | 180 | async with session.post(url, json=json_data) as response: |
181 | # ) as session: | 181 | if response.status == 200: |
182 | # json_data = self.get_ocr_json(img_path) | 182 | return await response.json() |
183 | # async with session.post(self.ocr_url, json=json_data) as response: | 183 | |
184 | # return await response.json() | ||
185 | # | ||
186 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): | 184 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): |
187 | # res = await self.fetch_ocr_result(img_path) | 185 | # res = await self.fetch_ocr_result(img_path) |
188 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | 186 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) |
... | @@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin): |
190 | # img_name = os.path.basename(img_path) | 188 | # img_name = os.path.basename(img_path) |
191 | # self.append_sheet(wb, sheets_list, img_name, summary) | 189 | # self.append_sheet(wb, sheets_list, img_name, summary) |
192 | 190 | ||
193 | def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | 191 | async def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): |
194 | # # 流水 | ||
195 | # res = { | ||
196 | # 'code': 1, | ||
197 | # 'msg': 'success', | ||
198 | # 'data': { | ||
199 | # 'classify': 0, | ||
200 | # 'confidence': 0.999, | ||
201 | # 'data': [ | ||
202 | # { | ||
203 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
204 | # 'cells': [] | ||
205 | # }, | ||
206 | # { | ||
207 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
208 | # 'cells': [] | ||
209 | # } | ||
210 | # ] | ||
211 | # } | ||
212 | # } | ||
213 | # | ||
214 | # # 证件-1 | ||
215 | # res = { | ||
216 | # 'code': 1, | ||
217 | # 'msg': 'success', | ||
218 | # 'data': { | ||
219 | # 'classify': 0, | ||
220 | # 'confidence': 0.999, | ||
221 | # 'data': [ | ||
222 | # { | ||
223 | # 'cn_key': 'value', | ||
224 | # 'cn_key': 'value', | ||
225 | # }, | ||
226 | # { | ||
227 | # 'cn_key': 'value', | ||
228 | # 'cn_key': 'value', | ||
229 | # }, | ||
230 | # ] | ||
231 | # } | ||
232 | # } | ||
233 | # | ||
234 | # # 证件-2 or 其他类 | ||
235 | # res = { | ||
236 | # 'code': 1, | ||
237 | # 'msg': 'success', | ||
238 | # 'data': { | ||
239 | # 'classify': 0, | ||
240 | # 'confidence': 0.999, | ||
241 | # } | ||
242 | # } | ||
243 | with open(img_path, 'rb') as f: | 192 | with open(img_path, 'rb') as f: |
244 | base64_data = base64.b64encode(f.read()) | 193 | base64_data = base64.b64encode(f.read()) |
245 | # 获取解码后的base64值 | 194 | # 获取解码后的base64值 |
... | @@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin): |
247 | json_data_1 = { | 196 | json_data_1 = { |
248 | "file": file_data | 197 | "file": file_data |
249 | } | 198 | } |
250 | response_1 = requests.post(self.ocr_url_1, json=json_data_1) | 199 | ocr_res_1 = await self.fetch_ocr_result(self.ocr_url_1, json_data_1) |
251 | if response_1.status_code == 200: | 200 | if ocr_res_1 is None: |
252 | ocr_res_1 = response_1.json() | 201 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) |
202 | else: | ||
253 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | 203 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( |
254 | self.log_base, img_path, ocr_res_1)) | 204 | self.log_base, img_path, ocr_res_1)) |
255 | 205 | ||
... | @@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin): |
270 | "secret": conf.OCR_SECRET, | 220 | "secret": conf.OCR_SECRET, |
271 | "file": file_data | 221 | "file": file_data |
272 | } | 222 | } |
273 | response_2 = requests.post(self.ocr_url_2, data=json_data_2) | 223 | ocr_res_2 = await self.fetch_ocr_result(self.ocr_url_2, json_data_2) |
274 | if response_2.status_code == 200: | 224 | if ocr_res_2 is None: |
225 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
226 | else: | ||
275 | # 识别结果 | 227 | # 识别结果 |
276 | ocr_res_2 = response_2.json() | ||
277 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | 228 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( |
278 | self.log_base, img_path, ocr_res_2)) | 229 | self.log_base, img_path, ocr_res_2)) |
279 | self.license2_process(ocr_res_2, license_summary, pid, classify) | 230 | self.license2_process(ocr_res_2, license_summary, pid, classify) |
280 | else: | ||
281 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
282 | else: # 流水处理 | 231 | else: # 流水处理 |
283 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | 232 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) |
284 | else: | ||
285 | pass | ||
286 | else: | ||
287 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
288 | 233 | ||
234 | # def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | ||
235 | # # # 流水 | ||
236 | # # res = { | ||
237 | # # 'code': 1, | ||
238 | # # 'msg': 'success', | ||
239 | # # 'data': { | ||
240 | # # 'classify': 0, | ||
241 | # # 'confidence': 0.999, | ||
242 | # # 'data': [ | ||
243 | # # { | ||
244 | # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
245 | # # 'cells': [] | ||
246 | # # }, | ||
247 | # # { | ||
248 | # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
249 | # # 'cells': [] | ||
250 | # # } | ||
251 | # # ] | ||
252 | # # } | ||
253 | # # } | ||
254 | # # | ||
255 | # # # 证件-1 | ||
256 | # # res = { | ||
257 | # # 'code': 1, | ||
258 | # # 'msg': 'success', | ||
259 | # # 'data': { | ||
260 | # # 'classify': 0, | ||
261 | # # 'confidence': 0.999, | ||
262 | # # 'data': [ | ||
263 | # # { | ||
264 | # # 'cn_key': 'value', | ||
265 | # # 'cn_key': 'value', | ||
266 | # # }, | ||
267 | # # { | ||
268 | # # 'cn_key': 'value', | ||
269 | # # 'cn_key': 'value', | ||
270 | # # }, | ||
271 | # # ] | ||
272 | # # } | ||
273 | # # } | ||
274 | # # | ||
275 | # # # 证件-2 or 其他类 | ||
276 | # # res = { | ||
277 | # # 'code': 1, | ||
278 | # # 'msg': 'success', | ||
279 | # # 'data': { | ||
280 | # # 'classify': 0, | ||
281 | # # 'confidence': 0.999, | ||
282 | # # } | ||
283 | # # } | ||
284 | # with open(img_path, 'rb') as f: | ||
285 | # base64_data = base64.b64encode(f.read()) | ||
286 | # # 获取解码后的base64值 | ||
287 | # file_data = base64_data.decode() | ||
288 | # json_data_1 = { | ||
289 | # "file": file_data | ||
290 | # } | ||
291 | # response_1 = requests.post(self.ocr_url_1, json=json_data_1) | ||
292 | # if response_1.status_code == 200: | ||
293 | # ocr_res_1 = response_1.json() | ||
294 | # self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | ||
295 | # self.log_base, img_path, ocr_res_1)) | ||
296 | # | ||
297 | # if ocr_res_1.get('code') == 1: | ||
298 | # ocr_data = ocr_res_1.get('data', {}) | ||
299 | # classify = ocr_data.get('classify') | ||
300 | # if classify is None: | ||
301 | # return | ||
302 | # elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | ||
303 | # return | ||
304 | # elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1 | ||
305 | # self.license1_process(ocr_data, license_summary, classify) | ||
306 | # elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2 | ||
307 | # pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) | ||
308 | # json_data_2 = { | ||
309 | # "pid": str(pid), | ||
310 | # "key": conf.OCR_KEY, | ||
311 | # "secret": conf.OCR_SECRET, | ||
312 | # "file": file_data | ||
313 | # } | ||
314 | # response_2 = requests.post(self.ocr_url_2, data=json_data_2) | ||
315 | # if response_2.status_code == 200: | ||
316 | # # 识别结果 | ||
317 | # ocr_res_2 = response_2.json() | ||
318 | # self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | ||
319 | # self.log_base, img_path, ocr_res_2)) | ||
320 | # self.license2_process(ocr_res_2, license_summary, pid, classify) | ||
321 | # else: | ||
322 | # raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
323 | # else: # 流水处理 | ||
324 | # self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | ||
325 | # else: | ||
326 | # pass | ||
327 | # else: | ||
328 | # raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
289 | 329 | ||
290 | @staticmethod | 330 | @staticmethod |
291 | def get_most(value_list): | 331 | def get_most(value_list): |
... | @@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin): |
475 | # wb = Workbook() | 515 | # wb = Workbook() |
476 | 516 | ||
477 | # 4.1 获取OCR结果 | 517 | # 4.1 获取OCR结果 |
478 | # loop = asyncio.get_event_loop() | 518 | loop = asyncio.get_event_loop() |
479 | # tasks = [self.img_2_ocr_2_wb(wb, img_path, summary) for img_path in pdf_handler.img_path_list] | 519 | tasks = [self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) |
480 | # loop.run_until_complete(asyncio.wait(tasks)) | 520 | for img_path in pdf_handler.img_path_list] |
521 | loop.run_until_complete(asyncio.wait(tasks)) | ||
481 | # loop.close() | 522 | # loop.close() |
482 | 523 | ||
483 | for img_path in pdf_handler.img_path_list: | 524 | # for img_path in pdf_handler.img_path_list: |
484 | self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) | 525 | # self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) |
485 | 526 | ||
486 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( | 527 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( |
487 | self.log_base, bs_summary, unknown_summary, license_summary)) | 528 | self.log_base, bs_summary, unknown_summary, license_summary)) | ... | ... |
-
Please register or sign in to post a comment