4fad0d1f by 周伟奇

add latigation

1 parent ce86bdd5
...@@ -13,6 +13,9 @@ class OCR2Exception(Exception): ...@@ -13,6 +13,9 @@ class OCR2Exception(Exception):
13 class OCR4Exception(Exception): 13 class OCR4Exception(Exception):
14 pass 14 pass
15 15
16 class LTGTException(Exception):
17 pass
18
16 19
17 class GCAPException(Exception): 20 class GCAPException(Exception):
18 pass 21 pass
......
...@@ -702,6 +702,23 @@ class BSWorkbook(Workbook): ...@@ -702,6 +702,23 @@ class BSWorkbook(Workbook):
702 if field_str is not None: 702 if field_str is not None:
703 count_list.append((field_str, count)) 703 count_list.append((field_str, count))
704 704
705 def ltgt_build(self, label, result_dict):
706 ws = self.create_sheet(label)
707 for key, value in result_dict.items():
708 if isinstance(value, list):
709 ws.append((key, *value))
710 elif isinstance(value, dict):
711 if 'words' in value:
712 ws.append((key, value['words']))
713 else:
714 for sub_key, sub_value in value.items():
715 if isinstance(sub_value, dict):
716 ws.append(('{0}: {1}'.format(key, sub_key), sub_value.get('words', '')))
717 else:
718 ws.append(('{0}: {1}'.format(key, sub_key), sub_value))
719 else:
720 ws.append((key, value))
721
705 def simple_license_rebuild(self, license_summary, document_scheme): 722 def simple_license_rebuild(self, license_summary, document_scheme):
706 # for ic_license_dict in license_summary.get(consts.IC_CLASSIFY, []): 723 # for ic_license_dict in license_summary.get(consts.IC_CLASSIFY, []):
707 # if ic_license_dict.get('类别') == '1': 724 # if ic_license_dict.get('类别') == '1':
......
...@@ -225,3 +225,13 @@ class PDFHandler: ...@@ -225,3 +225,13 @@ class PDFHandler:
225 else: 225 else:
226 self.merge_il(pdf, pno, il) 226 self.merge_il(pdf, pno, il)
227 self.img_count = len(self.img_path_list) 227 self.img_count = len(self.img_path_list)
228
229 def extract_page_image(self):
230 self.img_path_list = []
231 self.xref_set = set()
232 os.makedirs(self.img_dir_path, exist_ok=True)
233 with fitz.Document(self.path) as pdf:
234 for pno in range(pdf.pageCount):
235 page = pdf.loadPage(pno)
236 self.page_to_png(page)
237 self.img_count = len(self.img_path_list)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!