4acc8f62 by 周伟奇

# from openpyxl import Workbook

1 parent d4e9acd6
......@@ -5,7 +5,7 @@ import signal
import base64
import asyncio
import aiohttp
# from openpyxl import Workbook
import requests
from apps.doc.ocr.wb import BSWorkbook, Workbook
from django.core.management import BaseCommand
......@@ -109,20 +109,33 @@ class Command(BaseCommand, LoggerMixin):
base64_data = base64.b64encode(f.read())
return {'imgBase64': base64_data.decode('utf-8')}
async def fetch_ocr_result(self, img_path):
async with aiohttp.ClientSession(
headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
) as session:
json_data = self.get_ocr_json(img_path)
async with session.post(self.ocr_url, json=json_data) as response:
return await response.json()
async def img_ocr_excel(self, wb, img_path, role_summary):
res = await self.fetch_ocr_result(img_path)
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
# ) as session:
# json_data = self.get_ocr_json(img_path)
# async with session.post(self.ocr_url, json=json_data) as response:
# return await response.json()
#
# async def img_ocr_excel(self, wb, img_path, role_summary):
# res = await self.fetch_ocr_result(img_path)
# self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
# sheets_list = res.get('result').get('res')
# img_name = os.path.basename(img_path)
# self.append_sheet(wb, sheets_list, img_name, role_summary)
def fetch_ocr_result(self, img_path):
json_data = self.get_ocr_json(img_path)
response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header)
return response.json()
def img_ocr_excel(self, wb, img_path, role_summary):
res = self.fetch_ocr_result(img_path)
self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
sheets_list = res.get('result').get('res')
img_name = os.path.basename(img_path)
self.append_sheet(wb, sheets_list, img_name, role_summary)
if res.get('code') == 1:
sheets_list = res.get('result').get('res')
img_name = os.path.basename(img_path)
self.append_sheet(wb, sheets_list, img_name, role_summary)
# TODO 细化文件状态,不同异常状态采取不同的处理
# TODO 调用接口重试
......@@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin):
# loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True)
# wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword)
wb = Workbook()
loop = asyncio.get_event_loop()
tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list]
loop.run_until_complete(asyncio.wait(tasks))
# loop = asyncio.get_event_loop()
# tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list]
# loop.run_until_complete(asyncio.wait(tasks))
# loop.close()
for img_path in pdf_handler.img_path_list:
self.img_ocr_excel(wb, img_path, role_summary)
# 整合excel文件
# wb.save(src_excel_path)
# wb.rebuild(role_summary)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!