add asyncio
Showing
2 changed files
with
117 additions
and
78 deletions
| ... | @@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin): |
| 175 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) | 175 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) |
| 176 | license_summary.setdefault(classify, []).append(res_list) | 176 | license_summary.setdefault(classify, []).append(res_list) |
| 177 | 177 | ||
| 178 | # async def fetch_ocr_result(self, img_path): | 178 | async def fetch_ocr_result(self, url, json_data): |
| 179 | # async with aiohttp.ClientSession( | 179 | async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: |
| 180 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | 180 | async with session.post(url, json=json_data) as response: |
| 181 | # ) as session: | 181 | if response.status == 200: |
| 182 | # json_data = self.get_ocr_json(img_path) | 182 | return await response.json() |
| 183 | # async with session.post(self.ocr_url, json=json_data) as response: | 183 | |
| 184 | # return await response.json() | ||
| 185 | # | ||
| 186 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): | 184 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): |
| 187 | # res = await self.fetch_ocr_result(img_path) | 185 | # res = await self.fetch_ocr_result(img_path) |
| 188 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | 186 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) |
| ... | @@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin): |
| 190 | # img_name = os.path.basename(img_path) | 188 | # img_name = os.path.basename(img_path) |
| 191 | # self.append_sheet(wb, sheets_list, img_name, summary) | 189 | # self.append_sheet(wb, sheets_list, img_name, summary) |
| 192 | 190 | ||
| 193 | def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | 191 | async def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): |
| 194 | # # 流水 | ||
| 195 | # res = { | ||
| 196 | # 'code': 1, | ||
| 197 | # 'msg': 'success', | ||
| 198 | # 'data': { | ||
| 199 | # 'classify': 0, | ||
| 200 | # 'confidence': 0.999, | ||
| 201 | # 'data': [ | ||
| 202 | # { | ||
| 203 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
| 204 | # 'cells': [] | ||
| 205 | # }, | ||
| 206 | # { | ||
| 207 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
| 208 | # 'cells': [] | ||
| 209 | # } | ||
| 210 | # ] | ||
| 211 | # } | ||
| 212 | # } | ||
| 213 | # | ||
| 214 | # # 证件-1 | ||
| 215 | # res = { | ||
| 216 | # 'code': 1, | ||
| 217 | # 'msg': 'success', | ||
| 218 | # 'data': { | ||
| 219 | # 'classify': 0, | ||
| 220 | # 'confidence': 0.999, | ||
| 221 | # 'data': [ | ||
| 222 | # { | ||
| 223 | # 'cn_key': 'value', | ||
| 224 | # 'cn_key': 'value', | ||
| 225 | # }, | ||
| 226 | # { | ||
| 227 | # 'cn_key': 'value', | ||
| 228 | # 'cn_key': 'value', | ||
| 229 | # }, | ||
| 230 | # ] | ||
| 231 | # } | ||
| 232 | # } | ||
| 233 | # | ||
| 234 | # # 证件-2 or 其他类 | ||
| 235 | # res = { | ||
| 236 | # 'code': 1, | ||
| 237 | # 'msg': 'success', | ||
| 238 | # 'data': { | ||
| 239 | # 'classify': 0, | ||
| 240 | # 'confidence': 0.999, | ||
| 241 | # } | ||
| 242 | # } | ||
| 243 | with open(img_path, 'rb') as f: | 192 | with open(img_path, 'rb') as f: |
| 244 | base64_data = base64.b64encode(f.read()) | 193 | base64_data = base64.b64encode(f.read()) |
| 245 | # 获取解码后的base64值 | 194 | # 获取解码后的base64值 |
| ... | @@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin): |
| 247 | json_data_1 = { | 196 | json_data_1 = { |
| 248 | "file": file_data | 197 | "file": file_data |
| 249 | } | 198 | } |
| 250 | response_1 = requests.post(self.ocr_url_1, json=json_data_1) | 199 | ocr_res_1 = await self.fetch_ocr_result(self.ocr_url_1, json_data_1) |
| 251 | if response_1.status_code == 200: | 200 | if ocr_res_1 is None: |
| 252 | ocr_res_1 = response_1.json() | 201 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) |
| 202 | else: | ||
| 253 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | 203 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( |
| 254 | self.log_base, img_path, ocr_res_1)) | 204 | self.log_base, img_path, ocr_res_1)) |
| 255 | 205 | ||
| ... | @@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin): |
| 270 | "secret": conf.OCR_SECRET, | 220 | "secret": conf.OCR_SECRET, |
| 271 | "file": file_data | 221 | "file": file_data |
| 272 | } | 222 | } |
| 273 | response_2 = requests.post(self.ocr_url_2, data=json_data_2) | 223 | ocr_res_2 = await self.fetch_ocr_result(self.ocr_url_2, json_data_2) |
| 274 | if response_2.status_code == 200: | 224 | if ocr_res_2 is None: |
| 225 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
| 226 | else: | ||
| 275 | # 识别结果 | 227 | # 识别结果 |
| 276 | ocr_res_2 = response_2.json() | ||
| 277 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | 228 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( |
| 278 | self.log_base, img_path, ocr_res_2)) | 229 | self.log_base, img_path, ocr_res_2)) |
| 279 | self.license2_process(ocr_res_2, license_summary, pid, classify) | 230 | self.license2_process(ocr_res_2, license_summary, pid, classify) |
| 280 | else: | ||
| 281 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
| 282 | else: # 流水处理 | 231 | else: # 流水处理 |
| 283 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | 232 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) |
| 284 | else: | ||
| 285 | pass | ||
| 286 | else: | ||
| 287 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
| 288 | 233 | ||
| 234 | # def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | ||
| 235 | # # # 流水 | ||
| 236 | # # res = { | ||
| 237 | # # 'code': 1, | ||
| 238 | # # 'msg': 'success', | ||
| 239 | # # 'data': { | ||
| 240 | # # 'classify': 0, | ||
| 241 | # # 'confidence': 0.999, | ||
| 242 | # # 'data': [ | ||
| 243 | # # { | ||
| 244 | # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
| 245 | # # 'cells': [] | ||
| 246 | # # }, | ||
| 247 | # # { | ||
| 248 | # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
| 249 | # # 'cells': [] | ||
| 250 | # # } | ||
| 251 | # # ] | ||
| 252 | # # } | ||
| 253 | # # } | ||
| 254 | # # | ||
| 255 | # # # 证件-1 | ||
| 256 | # # res = { | ||
| 257 | # # 'code': 1, | ||
| 258 | # # 'msg': 'success', | ||
| 259 | # # 'data': { | ||
| 260 | # # 'classify': 0, | ||
| 261 | # # 'confidence': 0.999, | ||
| 262 | # # 'data': [ | ||
| 263 | # # { | ||
| 264 | # # 'cn_key': 'value', | ||
| 265 | # # 'cn_key': 'value', | ||
| 266 | # # }, | ||
| 267 | # # { | ||
| 268 | # # 'cn_key': 'value', | ||
| 269 | # # 'cn_key': 'value', | ||
| 270 | # # }, | ||
| 271 | # # ] | ||
| 272 | # # } | ||
| 273 | # # } | ||
| 274 | # # | ||
| 275 | # # # 证件-2 or 其他类 | ||
| 276 | # # res = { | ||
| 277 | # # 'code': 1, | ||
| 278 | # # 'msg': 'success', | ||
| 279 | # # 'data': { | ||
| 280 | # # 'classify': 0, | ||
| 281 | # # 'confidence': 0.999, | ||
| 282 | # # } | ||
| 283 | # # } | ||
| 284 | # with open(img_path, 'rb') as f: | ||
| 285 | # base64_data = base64.b64encode(f.read()) | ||
| 286 | # # 获取解码后的base64值 | ||
| 287 | # file_data = base64_data.decode() | ||
| 288 | # json_data_1 = { | ||
| 289 | # "file": file_data | ||
| 290 | # } | ||
| 291 | # response_1 = requests.post(self.ocr_url_1, json=json_data_1) | ||
| 292 | # if response_1.status_code == 200: | ||
| 293 | # ocr_res_1 = response_1.json() | ||
| 294 | # self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | ||
| 295 | # self.log_base, img_path, ocr_res_1)) | ||
| 296 | # | ||
| 297 | # if ocr_res_1.get('code') == 1: | ||
| 298 | # ocr_data = ocr_res_1.get('data', {}) | ||
| 299 | # classify = ocr_data.get('classify') | ||
| 300 | # if classify is None: | ||
| 301 | # return | ||
| 302 | # elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | ||
| 303 | # return | ||
| 304 | # elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1 | ||
| 305 | # self.license1_process(ocr_data, license_summary, classify) | ||
| 306 | # elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2 | ||
| 307 | # pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) | ||
| 308 | # json_data_2 = { | ||
| 309 | # "pid": str(pid), | ||
| 310 | # "key": conf.OCR_KEY, | ||
| 311 | # "secret": conf.OCR_SECRET, | ||
| 312 | # "file": file_data | ||
| 313 | # } | ||
| 314 | # response_2 = requests.post(self.ocr_url_2, data=json_data_2) | ||
| 315 | # if response_2.status_code == 200: | ||
| 316 | # # 识别结果 | ||
| 317 | # ocr_res_2 = response_2.json() | ||
| 318 | # self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | ||
| 319 | # self.log_base, img_path, ocr_res_2)) | ||
| 320 | # self.license2_process(ocr_res_2, license_summary, pid, classify) | ||
| 321 | # else: | ||
| 322 | # raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
| 323 | # else: # 流水处理 | ||
| 324 | # self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | ||
| 325 | # else: | ||
| 326 | # pass | ||
| 327 | # else: | ||
| 328 | # raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
| 289 | 329 | ||
| 290 | @staticmethod | 330 | @staticmethod |
| 291 | def get_most(value_list): | 331 | def get_most(value_list): |
| ... | @@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin): |
| 475 | # wb = Workbook() | 515 | # wb = Workbook() |
| 476 | 516 | ||
| 477 | # 4.1 获取OCR结果 | 517 | # 4.1 获取OCR结果 |
| 478 | # loop = asyncio.get_event_loop() | 518 | loop = asyncio.get_event_loop() |
| 479 | # tasks = [self.img_2_ocr_2_wb(wb, img_path, summary) for img_path in pdf_handler.img_path_list] | 519 | tasks = [self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) |
| 480 | # loop.run_until_complete(asyncio.wait(tasks)) | 520 | for img_path in pdf_handler.img_path_list] |
| 521 | loop.run_until_complete(asyncio.wait(tasks)) | ||
| 481 | # loop.close() | 522 | # loop.close() |
| 482 | 523 | ||
| 483 | for img_path in pdf_handler.img_path_list: | 524 | # for img_path in pdf_handler.img_path_list: |
| 484 | self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) | 525 | # self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) |
| 485 | 526 | ||
| 486 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( | 527 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( |
| 487 | self.log_base, bs_summary, unknown_summary, license_summary)) | 528 | self.log_base, bs_summary, unknown_summary, license_summary)) | ... | ... |
-
Please register or sign in to post a comment