4acc8f62 by 周伟奇

# from openpyxl import Workbook

1 parent d4e9acd6
...@@ -5,7 +5,7 @@ import signal ...@@ -5,7 +5,7 @@ import signal
5 import base64 5 import base64
6 import asyncio 6 import asyncio
7 import aiohttp 7 import aiohttp
8 # from openpyxl import Workbook 8 import requests
9 from apps.doc.ocr.wb import BSWorkbook, Workbook 9 from apps.doc.ocr.wb import BSWorkbook, Workbook
10 from django.core.management import BaseCommand 10 from django.core.management import BaseCommand
11 11
...@@ -109,20 +109,33 @@ class Command(BaseCommand, LoggerMixin): ...@@ -109,20 +109,33 @@ class Command(BaseCommand, LoggerMixin):
109 base64_data = base64.b64encode(f.read()) 109 base64_data = base64.b64encode(f.read())
110 return {'imgBase64': base64_data.decode('utf-8')} 110 return {'imgBase64': base64_data.decode('utf-8')}
111 111
112 async def fetch_ocr_result(self, img_path): 112 # async def fetch_ocr_result(self, img_path):
113 async with aiohttp.ClientSession( 113 # async with aiohttp.ClientSession(
114 headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) 114 # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
115 ) as session: 115 # ) as session:
116 json_data = self.get_ocr_json(img_path) 116 # json_data = self.get_ocr_json(img_path)
117 async with session.post(self.ocr_url, json=json_data) as response: 117 # async with session.post(self.ocr_url, json=json_data) as response:
118 return await response.json() 118 # return await response.json()
119 119 #
120 async def img_ocr_excel(self, wb, img_path, role_summary): 120 # async def img_ocr_excel(self, wb, img_path, role_summary):
121 res = await self.fetch_ocr_result(img_path) 121 # res = await self.fetch_ocr_result(img_path)
122 # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
123 # sheets_list = res.get('result').get('res')
124 # img_name = os.path.basename(img_path)
125 # self.append_sheet(wb, sheets_list, img_name, role_summary)
126
127 def fetch_ocr_result(self, img_path):
128 json_data = self.get_ocr_json(img_path)
129 response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header)
130 return response.json()
131
132 def img_ocr_excel(self, wb, img_path, role_summary):
133 res = self.fetch_ocr_result(img_path)
122 self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) 134 self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
123 sheets_list = res.get('result').get('res') 135 if res.get('code') == 1:
124 img_name = os.path.basename(img_path) 136 sheets_list = res.get('result').get('res')
125 self.append_sheet(wb, sheets_list, img_name, role_summary) 137 img_name = os.path.basename(img_path)
138 self.append_sheet(wb, sheets_list, img_name, role_summary)
126 139
127 # TODO 细化文件状态,不同异常状态采取不同的处理 140 # TODO 细化文件状态,不同异常状态采取不同的处理
128 # TODO 调用接口重试 141 # TODO 调用接口重试
...@@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin):
164 # loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True) 177 # loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True)
165 # wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword) 178 # wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword)
166 wb = Workbook() 179 wb = Workbook()
167 loop = asyncio.get_event_loop() 180 # loop = asyncio.get_event_loop()
168 tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list] 181 # tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list]
169 loop.run_until_complete(asyncio.wait(tasks)) 182 # loop.run_until_complete(asyncio.wait(tasks))
170 # loop.close() 183 # loop.close()
171 184
185 for img_path in pdf_handler.img_path_list:
186 self.img_ocr_excel(wb, img_path, role_summary)
187
172 # 整合excel文件 188 # 整合excel文件
173 # wb.save(src_excel_path) 189 # wb.save(src_excel_path)
174 # wb.rebuild(role_summary) 190 # wb.rebuild(role_summary)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!