update ocr url

周伟奇
Showing 1 changed file with 9 additions and 18 deletions
src/apps/doc/management/commands/doc_ocr_process.py
--- a/src/apps/doc/management/commands/doc_ocr_process.py
View file @a220590
+++ b/src/apps/doc/management/commands/doc_ocr_process.py
View file @a220590
@@ -28,16 +28,8 @@ class Command(BaseCommand, LoggerMixin):
        self.switch = True
        # 数据目录
        self.data_dir = conf.DATA_DIR
-        # pdf页面转图片
-        self.zoom_x = 2.0
-        self.zoom_y = 2.0
-        self.trans = fitz.Matrix(self.zoom_x, self.zoom_y).preRotate(0)  # zoom factor 2 in each dimension
        # ocr相关
        self.ocr_url = conf.OCR_URL
-        self.ocr_header = {
-            'X-Auth-Token': conf.OCR_TOKEN,
-            'Content-Type': 'application/json'
-        }
        # EDMS web_service_api
        self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD)
        # 优雅退出信号：15
@@ -103,12 +95,6 @@ class Command(BaseCommand, LoggerMixin):
                words = cell.get('words')
                ws.cell(row=r1+1, column=c1+1, value=words)

-    @staticmethod
-    def get_ocr_json(img_path):
-        with open(img_path, "rb") as f:
-            base64_data = base64.b64encode(f.read())
-        return {'imgBase64': base64_data.decode('utf-8')}
-
    # async def fetch_ocr_result(self, img_path):
    #     async with aiohttp.ClientSession(
    #             headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
@@ -125,15 +111,20 @@ class Command(BaseCommand, LoggerMixin):
    #     self.append_sheet(wb, sheets_list, img_name, role_summary)

    def fetch_ocr_result(self, img_path):
-        json_data = self.get_ocr_json(img_path)
-        response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header)
+        # payload = {'name': 'page_0_img_0_0'}
+        files = [
+            ('img', open(img_path, 'rb'))
+        ]
+        response = requests.request("POST", self.ocr_url, files=files)
        return response.json()

    def img_ocr_excel(self, wb, img_path, role_summary):
        res = self.fetch_ocr_result(img_path)
        self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
-        if res.get('code') == '1':
-            sheets_list = res.get('result').get('res')
+        if res.get('code') == 1:
+            sheets_list = res.get('data')
+            if not sheets_list:
+                return
            img_name = os.path.basename(img_path)
            self.append_sheet(wb, sheets_list, img_name, role_summary)