6a5899fa by 周伟奇

add asyncio

1 parent f682cf20
...@@ -33,6 +33,4 @@ data/* ...@@ -33,6 +33,4 @@ data/*
33 # 脚本 33 # 脚本
34 src/*.sh 34 src/*.sh
35 35
36 test.py
37 ocr_test.py
38 ocr_test_2.py
...\ No newline at end of file ...\ No newline at end of file
36 test*
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin): ...@@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin):
175 (field_dict.get('chn_key', ''), field_dict.get('value', ''))) 175 (field_dict.get('chn_key', ''), field_dict.get('value', '')))
176 license_summary.setdefault(classify, []).append(res_list) 176 license_summary.setdefault(classify, []).append(res_list)
177 177
178 # async def fetch_ocr_result(self, img_path): 178 async def fetch_ocr_result(self, url, json_data):
179 # async with aiohttp.ClientSession( 179 async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
180 # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) 180 async with session.post(url, json=json_data) as response:
181 # ) as session: 181 if response.status == 200:
182 # json_data = self.get_ocr_json(img_path) 182 return await response.json()
183 # async with session.post(self.ocr_url, json=json_data) as response: 183
184 # return await response.json()
185 #
186 # async def img_2_ocr_2_wb(self, wb, img_path, summary): 184 # async def img_2_ocr_2_wb(self, wb, img_path, summary):
187 # res = await self.fetch_ocr_result(img_path) 185 # res = await self.fetch_ocr_result(img_path)
188 # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) 186 # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
...@@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin):
190 # img_name = os.path.basename(img_path) 188 # img_name = os.path.basename(img_path)
191 # self.append_sheet(wb, sheets_list, img_name, summary) 189 # self.append_sheet(wb, sheets_list, img_name, summary)
192 190
193 def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): 191 async def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
194 # # 流水
195 # res = {
196 # 'code': 1,
197 # 'msg': 'success',
198 # 'data': {
199 # 'classify': 0,
200 # 'confidence': 0.999,
201 # 'data': [
202 # {
203 # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
204 # 'cells': []
205 # },
206 # {
207 # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
208 # 'cells': []
209 # }
210 # ]
211 # }
212 # }
213 #
214 # # 证件-1
215 # res = {
216 # 'code': 1,
217 # 'msg': 'success',
218 # 'data': {
219 # 'classify': 0,
220 # 'confidence': 0.999,
221 # 'data': [
222 # {
223 # 'cn_key': 'value',
224 # 'cn_key': 'value',
225 # },
226 # {
227 # 'cn_key': 'value',
228 # 'cn_key': 'value',
229 # },
230 # ]
231 # }
232 # }
233 #
234 # # 证件-2 or 其他类
235 # res = {
236 # 'code': 1,
237 # 'msg': 'success',
238 # 'data': {
239 # 'classify': 0,
240 # 'confidence': 0.999,
241 # }
242 # }
243 with open(img_path, 'rb') as f: 192 with open(img_path, 'rb') as f:
244 base64_data = base64.b64encode(f.read()) 193 base64_data = base64.b64encode(f.read())
245 # 获取解码后的base64值 194 # 获取解码后的base64值
...@@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin):
247 json_data_1 = { 196 json_data_1 = {
248 "file": file_data 197 "file": file_data
249 } 198 }
250 response_1 = requests.post(self.ocr_url_1, json=json_data_1) 199 ocr_res_1 = await self.fetch_ocr_result(self.ocr_url_1, json_data_1)
251 if response_1.status_code == 200: 200 if ocr_res_1 is None:
252 ocr_res_1 = response_1.json() 201 raise Exception('ocr 1 error, img_path={0}'.format(img_path))
202 else:
253 self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( 203 self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format(
254 self.log_base, img_path, ocr_res_1)) 204 self.log_base, img_path, ocr_res_1))
255 205
...@@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin): ...@@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin):
270 "secret": conf.OCR_SECRET, 220 "secret": conf.OCR_SECRET,
271 "file": file_data 221 "file": file_data
272 } 222 }
273 response_2 = requests.post(self.ocr_url_2, data=json_data_2) 223 ocr_res_2 = await self.fetch_ocr_result(self.ocr_url_2, json_data_2)
274 if response_2.status_code == 200: 224 if ocr_res_2 is None:
225 raise Exception('ocr 2 error, img_path={0}'.format(img_path))
226 else:
275 # 识别结果 227 # 识别结果
276 ocr_res_2 = response_2.json()
277 self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( 228 self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format(
278 self.log_base, img_path, ocr_res_2)) 229 self.log_base, img_path, ocr_res_2))
279 self.license2_process(ocr_res_2, license_summary, pid, classify) 230 self.license2_process(ocr_res_2, license_summary, pid, classify)
280 else:
281 raise Exception('ocr 2 error, img_path={0}'.format(img_path))
282 else: # 流水处理 231 else: # 流水处理
283 self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) 232 self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify)
284 else:
285 pass
286 else:
287 raise Exception('ocr 1 error, img_path={0}'.format(img_path))
288 233
234 # def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
235 # # # 流水
236 # # res = {
237 # # 'code': 1,
238 # # 'msg': 'success',
239 # # 'data': {
240 # # 'classify': 0,
241 # # 'confidence': 0.999,
242 # # 'data': [
243 # # {
244 # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
245 # # 'cells': []
246 # # },
247 # # {
248 # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
249 # # 'cells': []
250 # # }
251 # # ]
252 # # }
253 # # }
254 # #
255 # # # 证件-1
256 # # res = {
257 # # 'code': 1,
258 # # 'msg': 'success',
259 # # 'data': {
260 # # 'classify': 0,
261 # # 'confidence': 0.999,
262 # # 'data': [
263 # # {
264 # # 'cn_key': 'value',
265 # # 'cn_key': 'value',
266 # # },
267 # # {
268 # # 'cn_key': 'value',
269 # # 'cn_key': 'value',
270 # # },
271 # # ]
272 # # }
273 # # }
274 # #
275 # # # 证件-2 or 其他类
276 # # res = {
277 # # 'code': 1,
278 # # 'msg': 'success',
279 # # 'data': {
280 # # 'classify': 0,
281 # # 'confidence': 0.999,
282 # # }
283 # # }
284 # with open(img_path, 'rb') as f:
285 # base64_data = base64.b64encode(f.read())
286 # # 获取解码后的base64值
287 # file_data = base64_data.decode()
288 # json_data_1 = {
289 # "file": file_data
290 # }
291 # response_1 = requests.post(self.ocr_url_1, json=json_data_1)
292 # if response_1.status_code == 200:
293 # ocr_res_1 = response_1.json()
294 # self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format(
295 # self.log_base, img_path, ocr_res_1))
296 #
297 # if ocr_res_1.get('code') == 1:
298 # ocr_data = ocr_res_1.get('data', {})
299 # classify = ocr_data.get('classify')
300 # if classify is None:
301 # return
302 # elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
303 # return
304 # elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
305 # self.license1_process(ocr_data, license_summary, classify)
306 # elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
307 # pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
308 # json_data_2 = {
309 # "pid": str(pid),
310 # "key": conf.OCR_KEY,
311 # "secret": conf.OCR_SECRET,
312 # "file": file_data
313 # }
314 # response_2 = requests.post(self.ocr_url_2, data=json_data_2)
315 # if response_2.status_code == 200:
316 # # 识别结果
317 # ocr_res_2 = response_2.json()
318 # self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format(
319 # self.log_base, img_path, ocr_res_2))
320 # self.license2_process(ocr_res_2, license_summary, pid, classify)
321 # else:
322 # raise Exception('ocr 2 error, img_path={0}'.format(img_path))
323 # else: # 流水处理
324 # self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify)
325 # else:
326 # pass
327 # else:
328 # raise Exception('ocr 1 error, img_path={0}'.format(img_path))
289 329
290 @staticmethod 330 @staticmethod
291 def get_most(value_list): 331 def get_most(value_list):
...@@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin):
475 # wb = Workbook() 515 # wb = Workbook()
476 516
477 # 4.1 获取OCR结果 517 # 4.1 获取OCR结果
478 # loop = asyncio.get_event_loop() 518 loop = asyncio.get_event_loop()
479 # tasks = [self.img_2_ocr_2_wb(wb, img_path, summary) for img_path in pdf_handler.img_path_list] 519 tasks = [self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
480 # loop.run_until_complete(asyncio.wait(tasks)) 520 for img_path in pdf_handler.img_path_list]
521 loop.run_until_complete(asyncio.wait(tasks))
481 # loop.close() 522 # loop.close()
482 523
483 for img_path in pdf_handler.img_path_list: 524 # for img_path in pdf_handler.img_path_list:
484 self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) 525 # self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
485 526
486 self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( 527 self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format(
487 self.log_base, bs_summary, unknown_summary, license_summary)) 528 self.log_base, bs_summary, unknown_summary, license_summary))
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!