Merge branch 'feature/img' into feature/0611
Showing
3 changed files
with
23 additions
and
3 deletions
... | @@ -585,7 +585,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -585,7 +585,7 @@ class Command(BaseCommand, LoggerMixin): |
585 | img_save_path = os.path.join(doc_data_path, 'img') | 585 | img_save_path = os.path.join(doc_data_path, 'img') |
586 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) | 586 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id)) |
587 | 587 | ||
588 | pdf_handler = PDFHandler(pdf_path, img_save_path) | 588 | pdf_handler = PDFHandler(pdf_path, img_save_path, doc.document_name) |
589 | max_count_obj = Configs.objects.filter(id=2).first() | 589 | max_count_obj = Configs.objects.filter(id=2).first() |
590 | try: | 590 | try: |
591 | max_img_count = int(max_count_obj.value) | 591 | max_img_count = int(max_count_obj.value) | ... | ... |
... | @@ -27,7 +27,7 @@ from apps.doc.named_enum import RequestTeam, RequestTrigger, ProcessName | ... | @@ -27,7 +27,7 @@ from apps.doc.named_enum import RequestTeam, RequestTrigger, ProcessName |
27 | from common.tools.comparison import cp | 27 | from common.tools.comparison import cp |
28 | 28 | ||
29 | compare_log = logging.getLogger('compare') | 29 | compare_log = logging.getLogger('compare') |
30 | log_base = '[CA Compare]' | 30 | log_base = '[Compare]' |
31 | 31 | ||
32 | 32 | ||
33 | def name_check(ocr_res_dict, second_ocr_field, second_compare_list, second_id_num, name): | 33 | def name_check(ocr_res_dict, second_ocr_field, second_compare_list, second_id_num, name): | ... | ... |
1 | import os | 1 | import os |
2 | import shutil | ||
2 | import fitz | 3 | import fitz |
3 | from PIL import Image | 4 | from PIL import Image |
4 | from io import BytesIO | 5 | from io import BytesIO |
... | @@ -22,12 +23,25 @@ WH_COUPLE_5 = (100, 200) | ... | @@ -22,12 +23,25 @@ WH_COUPLE_5 = (100, 200) |
22 | 23 | ||
23 | class PDFHandler: | 24 | class PDFHandler: |
24 | 25 | ||
25 | def __init__(self, path, img_dir_path): | 26 | def __init__(self, path, img_dir_path, document_name=None): |
26 | self.path = path | 27 | self.path = path |
27 | self.img_dir_path = img_dir_path | 28 | self.img_dir_path = img_dir_path |
28 | self.img_path_list = [] | 29 | self.img_path_list = [] |
29 | self.img_count = 0 | 30 | self.img_count = 0 |
30 | self.xref_set = set() | 31 | self.xref_set = set() |
32 | self.img_suffixs = {'.jpeg', '.jpg', '.png', '.webp', '.bmp'} | ||
33 | self.suffix = self.get_suffix(document_name) | ||
34 | |||
35 | def get_suffix(self, file_name): | ||
36 | if file_name is None: | ||
37 | return None | ||
38 | try: | ||
39 | _, src_suffix = os.path.splitext(file_name) | ||
40 | lower_suffix = src_suffix.lower() | ||
41 | if lower_suffix in self.img_suffixs: | ||
42 | return lower_suffix | ||
43 | except Exception as e: | ||
44 | return | ||
31 | 45 | ||
32 | def get_img_save_path(self, pno, img_index=0, ext='png'): | 46 | def get_img_save_path(self, pno, img_index=0, ext='png'): |
33 | return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext)) | 47 | return os.path.join(self.img_dir_path, 'page_{0}_img_{1}.{2}'.format(pno, img_index, ext)) |
... | @@ -197,6 +211,12 @@ class PDFHandler: | ... | @@ -197,6 +211,12 @@ class PDFHandler: |
197 | self.img_path_list = [] | 211 | self.img_path_list = [] |
198 | self.xref_set = set() | 212 | self.xref_set = set() |
199 | os.makedirs(self.img_dir_path, exist_ok=True) | 213 | os.makedirs(self.img_dir_path, exist_ok=True) |
214 | |||
215 | if self.suffix in self.img_suffixs: | ||
216 | img_save_path = self.get_img_save_path(0, ext=self.suffix[1:]) | ||
217 | shutil.copy(self.path, img_save_path) | ||
218 | self.img_path_list.append(img_save_path) | ||
219 | else: | ||
200 | with fitz.Document(self.path) as pdf: | 220 | with fitz.Document(self.path) as pdf: |
201 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: | 221 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: |
202 | self.img_count = pdf.pageCount | 222 | self.img_count = pdf.pageCount | ... | ... |
-
Please register or sign in to post a comment