add ltgt wb daily

周伟奇
Showing 2 changed files with 7 additions and 78 deletions
src/apps/doc/management/commands/folder_ltgt_process.py
src/apps/doc/management/commands/folder_ocr_process.py
--- a/src/apps/doc/management/commands/folder_ltgt_process.py 0 → 100644
View file @c1ca6fa
+++ b/src/apps/doc/management/commands/folder_ltgt_process.py 0 → 100644
View file @c1ca6fa
--- a/src/apps/doc/management/commands/folder_ocr_process.py
View file @c1ca6fa
+++ b/src/apps/doc/management/commands/folder_ocr_process.py
View file @c1ca6fa
@@ -15,7 +15,7 @@ from settings import conf
 from common.mixins import LoggerMixin
 from common.tools.pdf_to_img import PDFHandler
 from apps.doc import consts
-from apps.doc.exceptions import OCR1Exception, OCR4Exception, LTGTException
+from apps.doc.exceptions import OCR1Exception, OCR4Exception
 from apps.doc.ocr.wb import BSWorkbook


@@ -48,11 +48,6 @@ class Command(BaseCommand, LoggerMixin):
        self.log_base = '[folder ocr process]'
        # 处理文件开关
        self.switch = True
-        self.ltgt_classify_mapping = {
-            128: '执行裁定书',
-            129: '民事判决书',
-            130: '民事调解书'
-        }
        # 睡眠时间
        self.sleep_time = float(conf.SLEEP_SECOND_FOLDER)
        # input folder
@@ -60,7 +55,6 @@ class Command(BaseCommand, LoggerMixin):
        # ocr相关
        self.ocr_url = conf.OCR_URL_FOLDER
        self.ocr_url_4 = conf.IC_URL
-        self.ltgt_ocr_url = conf.LTGT_URL
        # 优雅退出信号：15
        signal.signal(signal.SIGTERM, self.signal_handler)

@@ -213,59 +207,6 @@ class Command(BaseCommand, LoggerMixin):
            else:
                self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
                
-    def ltgt_ocr_process(self, img_path_list, label, path):
-        img_data_list = []
-
-        for img_path in img_path_list:
-            if os.path.exists(img_path):
-                with open(img_path, 'rb') as f:
-                    base64_data = base64.b64encode(f.read())
-                    # 获取解码后的base64值
-                    file_data = base64_data.decode()
-                img_data_list.append(file_data)
-
-        json_data = {
-            "label": label,
-            "img_data_list": img_data_list
-        }
-
-        for times in range(consts.RETRY_TIMES):
-            try:
-                start_time = time.time()
-                ocr_response = requests.post(self.ltgt_ocr_url, json=json_data)
-                if ocr_response.status_code != 200:
-                    raise LTGTException('{0} ltgt ocr status code: {1}'.format(self.log_base, ocr_response.status_code))
-            except Exception as e:
-                self.folder_log.warn('{0} [ltgt ocr failed] [times={1}] [path={2}] [error={3}]'.format(
-                    self.log_base, times, path, traceback.format_exc()))
-            else:
-                ocr_res = ocr_response.json()
-                end_time = time.time()
-                speed_time = int(end_time - start_time)
-                self.folder_log.info('{0} [ltgt ocr success] [path={1}] [res={2}] [speed_time={3}]'.format(
-                    self.log_base, path, ocr_res, speed_time))
-                return ocr_res
-        else:
-            self.folder_log.warn('{0} [ltgt ocr failed] [path={1}]'.format(self.log_base, path))
-    
-    def ltgt_res_process(self, ocr_res, label, excel_path):
-        try:
-            if isinstance(ocr_res, dict):
-                if ocr_res.get('code') == 1:
-                    result_dict = ocr_res.get('data', {})
-
-                    wb = BSWorkbook(set(), set(), set(), set(), set())
-                    rebuild_res = wb.ltgt_build(label, result_dict)
-                    wb.remove_base_sheet()
-                    wb.save(excel_path)
-        except Exception as e:
-            self.folder_log.error('{0} [wb build error] [path={1}] [error={2}]'.format(
-                self.log_base, excel_path, traceback.format_exc()))
-
-    def ltgt_process(self, img_path_list, label, excel_path, path):
-        ocr_res = self.ltgt_ocr_process(img_path_list, label, path)
-        self.ltgt_res_process(ocr_res, label, excel_path)
-        
    def images_process(self, img_path_list, classify, excel_path):
        all_res = {}
        for img_path in img_path_list:
@@ -279,20 +220,14 @@ class Command(BaseCommand, LoggerMixin):
                img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
                self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
                pdf_handler = PDFHandler(path, img_save_path)
-                if classify in self.ltgt_classify_mapping:
-                    pdf_handler.extract_page_image()
-                else:
-                    pdf_handler.extract_image()
+                pdf_handler.extract_image()
                self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path))
            except Exception as e:
                self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format(
                    self.log_base, path, traceback.format_exc()))
                raise e
            else:
-                if classify in self.ltgt_classify_mapping:
-                    self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], excel_path, path)
-                else:
-                    self.images_process(pdf_handler.img_path_list, classify, excel_path)
+                self.images_process(pdf_handler.img_path_list, classify, excel_path)
                shutil.move(path, pdf_save_path)

    def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir):
@@ -308,10 +243,7 @@ class Command(BaseCommand, LoggerMixin):
                    self.log_base, path, traceback.format_exc()))
                raise e
            else:
-                if classify in self.ltgt_classify_mapping:
-                    self.ltgt_process(tiff_handler.img_path_list, self.ltgt_classify_mapping[classify], excel_path, path)
-                else:
-                    self.images_process(tiff_handler.img_path_list, classify, excel_path)
+                self.images_process(tiff_handler.img_path_list, classify, excel_path)
                shutil.move(path, tiff_save_path)

    def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
@@ -321,12 +253,9 @@ class Command(BaseCommand, LoggerMixin):
            self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
                self.log_base, path, traceback.format_exc()))
        else:
-            if classify in self.ltgt_classify_mapping:
-                self.ltgt_process([path], self.ltgt_classify_mapping[classify], excel_path, path)
-            else:
-                ocr_res = self.ocr_process(path, classify)
-                all_res = {path: ocr_res}
-                self.res_process(all_res, classify, excel_path)
+            ocr_res = self.ocr_process(path, classify)
+            all_res = {path: ocr_res}
+            self.res_process(all_res, classify, excel_path)
            shutil.move(path, img_save_path)

    def folder_process(self, input_dir, classify):