# from openpyxl import Workbook
Showing
1 changed file
with
33 additions
and
17 deletions
... | @@ -5,7 +5,7 @@ import signal | ... | @@ -5,7 +5,7 @@ import signal |
5 | import base64 | 5 | import base64 |
6 | import asyncio | 6 | import asyncio |
7 | import aiohttp | 7 | import aiohttp |
8 | # from openpyxl import Workbook | 8 | import requests |
9 | from apps.doc.ocr.wb import BSWorkbook, Workbook | 9 | from apps.doc.ocr.wb import BSWorkbook, Workbook |
10 | from django.core.management import BaseCommand | 10 | from django.core.management import BaseCommand |
11 | 11 | ||
... | @@ -109,20 +109,33 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -109,20 +109,33 @@ class Command(BaseCommand, LoggerMixin): |
109 | base64_data = base64.b64encode(f.read()) | 109 | base64_data = base64.b64encode(f.read()) |
110 | return {'imgBase64': base64_data.decode('utf-8')} | 110 | return {'imgBase64': base64_data.decode('utf-8')} |
111 | 111 | ||
112 | async def fetch_ocr_result(self, img_path): | 112 | # async def fetch_ocr_result(self, img_path): |
113 | async with aiohttp.ClientSession( | 113 | # async with aiohttp.ClientSession( |
114 | headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | 114 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) |
115 | ) as session: | 115 | # ) as session: |
116 | json_data = self.get_ocr_json(img_path) | 116 | # json_data = self.get_ocr_json(img_path) |
117 | async with session.post(self.ocr_url, json=json_data) as response: | 117 | # async with session.post(self.ocr_url, json=json_data) as response: |
118 | return await response.json() | 118 | # return await response.json() |
119 | 119 | # | |
120 | async def img_ocr_excel(self, wb, img_path, role_summary): | 120 | # async def img_ocr_excel(self, wb, img_path, role_summary): |
121 | res = await self.fetch_ocr_result(img_path) | 121 | # res = await self.fetch_ocr_result(img_path) |
122 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | ||
123 | # sheets_list = res.get('result').get('res') | ||
124 | # img_name = os.path.basename(img_path) | ||
125 | # self.append_sheet(wb, sheets_list, img_name, role_summary) | ||
126 | |||
127 | def fetch_ocr_result(self, img_path): | ||
128 | json_data = self.get_ocr_json(img_path) | ||
129 | response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header) | ||
130 | return response.json() | ||
131 | |||
132 | def img_ocr_excel(self, wb, img_path, role_summary): | ||
133 | res = self.fetch_ocr_result(img_path) | ||
122 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | 134 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) |
123 | sheets_list = res.get('result').get('res') | 135 | if res.get('code') == 1: |
124 | img_name = os.path.basename(img_path) | 136 | sheets_list = res.get('result').get('res') |
125 | self.append_sheet(wb, sheets_list, img_name, role_summary) | 137 | img_name = os.path.basename(img_path) |
138 | self.append_sheet(wb, sheets_list, img_name, role_summary) | ||
126 | 139 | ||
127 | # TODO 细化文件状态,不同异常状态采取不同的处理 | 140 | # TODO 细化文件状态,不同异常状态采取不同的处理 |
128 | # TODO 调用接口重试 | 141 | # TODO 调用接口重试 |
... | @@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin): |
164 | # loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True) | 177 | # loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True) |
165 | # wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword) | 178 | # wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword) |
166 | wb = Workbook() | 179 | wb = Workbook() |
167 | loop = asyncio.get_event_loop() | 180 | # loop = asyncio.get_event_loop() |
168 | tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list] | 181 | # tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list] |
169 | loop.run_until_complete(asyncio.wait(tasks)) | 182 | # loop.run_until_complete(asyncio.wait(tasks)) |
170 | # loop.close() | 183 | # loop.close() |
171 | 184 | ||
185 | for img_path in pdf_handler.img_path_list: | ||
186 | self.img_ocr_excel(wb, img_path, role_summary) | ||
187 | |||
172 | # 整合excel文件 | 188 | # 整合excel文件 |
173 | # wb.save(src_excel_path) | 189 | # wb.save(src_excel_path) |
174 | # wb.rebuild(role_summary) | 190 | # wb.rebuild(role_summary) | ... | ... |
-
Please register or sign in to post a comment