update ocr url
Showing
1 changed file
with
9 additions
and
18 deletions
... | @@ -28,16 +28,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -28,16 +28,8 @@ class Command(BaseCommand, LoggerMixin): |
28 | self.switch = True | 28 | self.switch = True |
29 | # 数据目录 | 29 | # 数据目录 |
30 | self.data_dir = conf.DATA_DIR | 30 | self.data_dir = conf.DATA_DIR |
31 | # pdf页面转图片 | ||
32 | self.zoom_x = 2.0 | ||
33 | self.zoom_y = 2.0 | ||
34 | self.trans = fitz.Matrix(self.zoom_x, self.zoom_y).preRotate(0) # zoom factor 2 in each dimension | ||
35 | # ocr相关 | 31 | # ocr相关 |
36 | self.ocr_url = conf.OCR_URL | 32 | self.ocr_url = conf.OCR_URL |
37 | self.ocr_header = { | ||
38 | 'X-Auth-Token': conf.OCR_TOKEN, | ||
39 | 'Content-Type': 'application/json' | ||
40 | } | ||
41 | # EDMS web_service_api | 33 | # EDMS web_service_api |
42 | self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD) | 34 | self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD) |
43 | # 优雅退出信号:15 | 35 | # 优雅退出信号:15 |
... | @@ -103,12 +95,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -103,12 +95,6 @@ class Command(BaseCommand, LoggerMixin): |
103 | words = cell.get('words') | 95 | words = cell.get('words') |
104 | ws.cell(row=r1+1, column=c1+1, value=words) | 96 | ws.cell(row=r1+1, column=c1+1, value=words) |
105 | 97 | ||
106 | @staticmethod | ||
107 | def get_ocr_json(img_path): | ||
108 | with open(img_path, "rb") as f: | ||
109 | base64_data = base64.b64encode(f.read()) | ||
110 | return {'imgBase64': base64_data.decode('utf-8')} | ||
111 | |||
112 | # async def fetch_ocr_result(self, img_path): | 98 | # async def fetch_ocr_result(self, img_path): |
113 | # async with aiohttp.ClientSession( | 99 | # async with aiohttp.ClientSession( |
114 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | 100 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) |
... | @@ -125,15 +111,20 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -125,15 +111,20 @@ class Command(BaseCommand, LoggerMixin): |
125 | # self.append_sheet(wb, sheets_list, img_name, role_summary) | 111 | # self.append_sheet(wb, sheets_list, img_name, role_summary) |
126 | 112 | ||
127 | def fetch_ocr_result(self, img_path): | 113 | def fetch_ocr_result(self, img_path): |
128 | json_data = self.get_ocr_json(img_path) | 114 | # payload = {'name': 'page_0_img_0_0'} |
129 | response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header) | 115 | files = [ |
116 | ('img', open(img_path, 'rb')) | ||
117 | ] | ||
118 | response = requests.request("POST", self.ocr_url, files=files) | ||
130 | return response.json() | 119 | return response.json() |
131 | 120 | ||
132 | def img_ocr_excel(self, wb, img_path, role_summary): | 121 | def img_ocr_excel(self, wb, img_path, role_summary): |
133 | res = self.fetch_ocr_result(img_path) | 122 | res = self.fetch_ocr_result(img_path) |
134 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | 123 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) |
135 | if res.get('code') == '1': | 124 | if res.get('code') == 1: |
136 | sheets_list = res.get('result').get('res') | 125 | sheets_list = res.get('data') |
126 | if not sheets_list: | ||
127 | return | ||
137 | img_name = os.path.basename(img_path) | 128 | img_name = os.path.basename(img_path) |
138 | self.append_sheet(wb, sheets_list, img_name, role_summary) | 129 | self.append_sheet(wb, sheets_list, img_name, role_summary) |
139 | 130 | ... | ... |
-
Please register or sign in to post a comment