# from openpyxl import Workbook
Showing
1 changed file
with
28 additions
and
12 deletions
... | @@ -5,7 +5,7 @@ import signal | ... | @@ -5,7 +5,7 @@ import signal |
5 | import base64 | 5 | import base64 |
6 | import asyncio | 6 | import asyncio |
7 | import aiohttp | 7 | import aiohttp |
8 | # from openpyxl import Workbook | 8 | import requests |
9 | from apps.doc.ocr.wb import BSWorkbook, Workbook | 9 | from apps.doc.ocr.wb import BSWorkbook, Workbook |
10 | from django.core.management import BaseCommand | 10 | from django.core.management import BaseCommand |
11 | 11 | ||
... | @@ -109,17 +109,30 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -109,17 +109,30 @@ class Command(BaseCommand, LoggerMixin): |
109 | base64_data = base64.b64encode(f.read()) | 109 | base64_data = base64.b64encode(f.read()) |
110 | return {'imgBase64': base64_data.decode('utf-8')} | 110 | return {'imgBase64': base64_data.decode('utf-8')} |
111 | 111 | ||
112 | async def fetch_ocr_result(self, img_path): | 112 | # async def fetch_ocr_result(self, img_path): |
113 | async with aiohttp.ClientSession( | 113 | # async with aiohttp.ClientSession( |
114 | headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | 114 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) |
115 | ) as session: | 115 | # ) as session: |
116 | # json_data = self.get_ocr_json(img_path) | ||
117 | # async with session.post(self.ocr_url, json=json_data) as response: | ||
118 | # return await response.json() | ||
119 | # | ||
120 | # async def img_ocr_excel(self, wb, img_path, role_summary): | ||
121 | # res = await self.fetch_ocr_result(img_path) | ||
122 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | ||
123 | # sheets_list = res.get('result').get('res') | ||
124 | # img_name = os.path.basename(img_path) | ||
125 | # self.append_sheet(wb, sheets_list, img_name, role_summary) | ||
126 | |||
127 | def fetch_ocr_result(self, img_path): | ||
116 | json_data = self.get_ocr_json(img_path) | 128 | json_data = self.get_ocr_json(img_path) |
117 | async with session.post(self.ocr_url, json=json_data) as response: | 129 | response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header) |
118 | return await response.json() | 130 | return response.json() |
119 | 131 | ||
120 | async def img_ocr_excel(self, wb, img_path, role_summary): | 132 | def img_ocr_excel(self, wb, img_path, role_summary): |
121 | res = await self.fetch_ocr_result(img_path) | 133 | res = self.fetch_ocr_result(img_path) |
122 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | 134 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) |
135 | if res.get('code') == 1: | ||
123 | sheets_list = res.get('result').get('res') | 136 | sheets_list = res.get('result').get('res') |
124 | img_name = os.path.basename(img_path) | 137 | img_name = os.path.basename(img_path) |
125 | self.append_sheet(wb, sheets_list, img_name, role_summary) | 138 | self.append_sheet(wb, sheets_list, img_name, role_summary) |
... | @@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin): |
164 | # loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True) | 177 | # loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True) |
165 | # wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword) | 178 | # wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword) |
166 | wb = Workbook() | 179 | wb = Workbook() |
167 | loop = asyncio.get_event_loop() | 180 | # loop = asyncio.get_event_loop() |
168 | tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list] | 181 | # tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list] |
169 | loop.run_until_complete(asyncio.wait(tasks)) | 182 | # loop.run_until_complete(asyncio.wait(tasks)) |
170 | # loop.close() | 183 | # loop.close() |
171 | 184 | ||
185 | for img_path in pdf_handler.img_path_list: | ||
186 | self.img_ocr_excel(wb, img_path, role_summary) | ||
187 | |||
172 | # 整合excel文件 | 188 | # 整合excel文件 |
173 | # wb.save(src_excel_path) | 189 | # wb.save(src_excel_path) |
174 | # wb.rebuild(role_summary) | 190 | # wb.rebuild(role_summary) | ... | ... |
-
Please register or sign in to post a comment