906f258d by 周伟奇

Merge branch 'feature/img' into feature/0611

2 parents 423427c0 fda1ebd3
...@@ -585,7 +585,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -585,7 +585,7 @@ class Command(BaseCommand, LoggerMixin):
585 img_save_path = os.path.join(doc_data_path, 'img') 585 img_save_path = os.path.join(doc_data_path, 'img')
586 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) 586 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
587 587
588 pdf_handler = PDFHandler(pdf_path, img_save_path) 588 pdf_handler = PDFHandler(pdf_path, img_save_path, doc.document_name)
589 max_count_obj = Configs.objects.filter(id=2).first() 589 max_count_obj = Configs.objects.filter(id=2).first()
590 try: 590 try:
591 max_img_count = int(max_count_obj.value) 591 max_img_count = int(max_count_obj.value)
......
...@@ -27,7 +27,7 @@ from apps.doc.named_enum import RequestTeam, RequestTrigger, ProcessName ...@@ -27,7 +27,7 @@ from apps.doc.named_enum import RequestTeam, RequestTrigger, ProcessName
27 from common.tools.comparison import cp 27 from common.tools.comparison import cp
28 28
29 compare_log = logging.getLogger('compare') 29 compare_log = logging.getLogger('compare')
30 log_base = '[CA Compare]' 30 log_base = '[Compare]'
31 31
32 32
33 def name_check(ocr_res_dict, second_ocr_field, second_compare_list, second_id_num, name): 33 def name_check(ocr_res_dict, second_ocr_field, second_compare_list, second_id_num, name):
......
1 import os 1 import os
2 import shutil
2 import fitz 3 import fitz
3 from PIL import Image 4 from PIL import Image
4 from io import BytesIO 5 from io import BytesIO
...@@ -22,12 +23,25 @@ WH_COUPLE_5 = (100, 200) ...@@ -22,12 +23,25 @@ WH_COUPLE_5 = (100, 200)
22 23
23 class PDFHandler: 24 class PDFHandler:
24 25
25 def __init__(self, path, img_dir_path): 26 def __init__(self, path, img_dir_path, document_name=None):
26 self.path = path 27 self.path = path
27 self.img_dir_path = img_dir_path 28 self.img_dir_path = img_dir_path
28 self.img_path_list = [] 29 self.img_path_list = []
29 self.img_count = 0 30 self.img_count = 0
30 self.xref_set = set() 31 self.xref_set = set()
32 self.img_suffixs = {'.jpeg', '.jpg', '.png', '.webp', '.bmp'}
33 self.suffix = self.get_suffix(document_name)
34
35 def get_suffix(self, file_name):
36 if file_name is None:
37 return None
38 try:
39 _, src_suffix = os.path.splitext(file_name)
40 lower_suffix = src_suffix.lower()
41 if lower_suffix in self.img_suffixs:
42 return lower_suffix
43 except Exception as e:
44 return
31 45
32 def get_img_save_path(self, pno, img_index=0, ext='png'): 46 def get_img_save_path(self, pno, img_index=0, ext='png'):
33 return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext)) 47 return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext))
...@@ -197,6 +211,12 @@ class PDFHandler: ...@@ -197,6 +211,12 @@ class PDFHandler:
197 self.img_path_list = [] 211 self.img_path_list = []
198 self.xref_set = set() 212 self.xref_set = set()
199 os.makedirs(self.img_dir_path, exist_ok=True) 213 os.makedirs(self.img_dir_path, exist_ok=True)
214
215 if self.suffix in self.img_suffixs:
216 img_save_path = self.get_img_save_path(0, ext=self.suffix[1:])
217 shutil.copy(self.path, img_save_path)
218 self.img_path_list.append(img_save_path)
219 else:
200 with fitz.Document(self.path) as pdf: 220 with fitz.Document(self.path) as pdf:
201 if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: 221 if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count:
202 self.img_count = pdf.pageCount 222 self.img_count = pdf.pageCount
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!