6a5899fa by 周伟奇

add asyncio

1 parent f682cf20
......@@ -33,6 +33,4 @@ data/*
# 脚本
src/*.sh
test.py
ocr_test.py
ocr_test_2.py
\ No newline at end of file
test*
\ No newline at end of file
......
......@@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin):
(field_dict.get('chn_key', ''), field_dict.get('value', '')))
license_summary.setdefault(classify, []).append(res_list)
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
# ) as session:
# json_data = self.get_ocr_json(img_path)
# async with session.post(self.ocr_url, json=json_data) as response:
# return await response.json()
#
async def fetch_ocr_result(self, url, json_data):
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
async with session.post(url, json=json_data) as response:
if response.status == 200:
return await response.json()
# async def img_2_ocr_2_wb(self, wb, img_path, summary):
# res = await self.fetch_ocr_result(img_path)
# self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
......@@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin):
# img_name = os.path.basename(img_path)
# self.append_sheet(wb, sheets_list, img_name, summary)
def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
# # 流水
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'data': [
# {
# 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# 'cells': []
# },
# {
# 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# 'cells': []
# }
# ]
# }
# }
#
# # 证件-1
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'data': [
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# ]
# }
# }
#
# # 证件-2 or 其他类
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# }
# }
async def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
with open(img_path, 'rb') as f:
base64_data = base64.b64encode(f.read())
# 获取解码后的base64值
......@@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin):
json_data_1 = {
"file": file_data
}
response_1 = requests.post(self.ocr_url_1, json=json_data_1)
if response_1.status_code == 200:
ocr_res_1 = response_1.json()
ocr_res_1 = await self.fetch_ocr_result(self.ocr_url_1, json_data_1)
if ocr_res_1 is None:
raise Exception('ocr 1 error, img_path={0}'.format(img_path))
else:
self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format(
self.log_base, img_path, ocr_res_1))
......@@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin):
"secret": conf.OCR_SECRET,
"file": file_data
}
response_2 = requests.post(self.ocr_url_2, data=json_data_2)
if response_2.status_code == 200:
ocr_res_2 = await self.fetch_ocr_result(self.ocr_url_2, json_data_2)
if ocr_res_2 is None:
raise Exception('ocr 2 error, img_path={0}'.format(img_path))
else:
# 识别结果
ocr_res_2 = response_2.json()
self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format(
self.log_base, img_path, ocr_res_2))
self.license2_process(ocr_res_2, license_summary, pid, classify)
else:
raise Exception('ocr 2 error, img_path={0}'.format(img_path))
else: # 流水处理
self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify)
else:
pass
else:
raise Exception('ocr 1 error, img_path={0}'.format(img_path))
# def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
# # # 流水
# # res = {
# # 'code': 1,
# # 'msg': 'success',
# # 'data': {
# # 'classify': 0,
# # 'confidence': 0.999,
# # 'data': [
# # {
# # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# # 'cells': []
# # },
# # {
# # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# # 'cells': []
# # }
# # ]
# # }
# # }
# #
# # # 证件-1
# # res = {
# # 'code': 1,
# # 'msg': 'success',
# # 'data': {
# # 'classify': 0,
# # 'confidence': 0.999,
# # 'data': [
# # {
# # 'cn_key': 'value',
# # 'cn_key': 'value',
# # },
# # {
# # 'cn_key': 'value',
# # 'cn_key': 'value',
# # },
# # ]
# # }
# # }
# #
# # # 证件-2 or 其他类
# # res = {
# # 'code': 1,
# # 'msg': 'success',
# # 'data': {
# # 'classify': 0,
# # 'confidence': 0.999,
# # }
# # }
# with open(img_path, 'rb') as f:
# base64_data = base64.b64encode(f.read())
# # 获取解码后的base64值
# file_data = base64_data.decode()
# json_data_1 = {
# "file": file_data
# }
# response_1 = requests.post(self.ocr_url_1, json=json_data_1)
# if response_1.status_code == 200:
# ocr_res_1 = response_1.json()
# self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format(
# self.log_base, img_path, ocr_res_1))
#
# if ocr_res_1.get('code') == 1:
# ocr_data = ocr_res_1.get('data', {})
# classify = ocr_data.get('classify')
# if classify is None:
# return
# elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
# return
# elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
# self.license1_process(ocr_data, license_summary, classify)
# elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
# pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
# json_data_2 = {
# "pid": str(pid),
# "key": conf.OCR_KEY,
# "secret": conf.OCR_SECRET,
# "file": file_data
# }
# response_2 = requests.post(self.ocr_url_2, data=json_data_2)
# if response_2.status_code == 200:
# # 识别结果
# ocr_res_2 = response_2.json()
# self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format(
# self.log_base, img_path, ocr_res_2))
# self.license2_process(ocr_res_2, license_summary, pid, classify)
# else:
# raise Exception('ocr 2 error, img_path={0}'.format(img_path))
# else: # 流水处理
# self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify)
# else:
# pass
# else:
# raise Exception('ocr 1 error, img_path={0}'.format(img_path))
@staticmethod
def get_most(value_list):
......@@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin):
# wb = Workbook()
# 4.1 获取OCR结果
# loop = asyncio.get_event_loop()
# tasks = [self.img_2_ocr_2_wb(wb, img_path, summary) for img_path in pdf_handler.img_path_list]
# loop.run_until_complete(asyncio.wait(tasks))
loop = asyncio.get_event_loop()
tasks = [self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
for img_path in pdf_handler.img_path_list]
loop.run_until_complete(asyncio.wait(tasks))
# loop.close()
for img_path in pdf_handler.img_path_list:
self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
# for img_path in pdf_handler.img_path_list:
# self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format(
self.log_base, bs_summary, unknown_summary, license_summary))
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!