Merge branch 'feature/weixin-bs-2'
Showing
4 changed files
with
266 additions
and
1 deletions
... | @@ -1287,7 +1287,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1287,7 +1287,10 @@ class Command(BaseCommand, LoggerMixin): |
1287 | target_pdf_path = os.path.join(pdf_doc_data_path, '{0}.pdf'.format(pdf_doc.id)) | 1287 | target_pdf_path = os.path.join(pdf_doc_data_path, '{0}.pdf'.format(pdf_doc.id)) |
1288 | shutil.move(pdf_path, target_pdf_path) | 1288 | shutil.move(pdf_path, target_pdf_path) |
1289 | 1289 | ||
1290 | pdf_task_str = consts.SPLIT_STR.join([business_type, str(pdf_doc.id), '0']) | 1290 | if '微信支付交易明细证明' in os.path.basename(pdf_path) or '微信流水' in os.path.basename(pdf_path): |
1291 | pdf_task_str = consts.SPLIT_STR.join([business_type, str(pdf_doc.id), '12']) | ||
1292 | else: | ||
1293 | pdf_task_str = consts.SPLIT_STR.join([business_type, str(pdf_doc.id), '0']) | ||
1291 | pdf_task_str_list.append(pdf_task_str) | 1294 | pdf_task_str_list.append(pdf_task_str) |
1292 | except Exception as e: | 1295 | except Exception as e: |
1293 | self.online_log.warn('{0} [zip_2_pdfs] [recreate pdf task failed] [task={1}] [pdf_path={2}]' | 1296 | self.online_log.warn('{0} [zip_2_pdfs] [recreate pdf task failed] [task={1}] [pdf_path={2}]' |
... | @@ -1504,6 +1507,138 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1504,6 +1507,138 @@ class Command(BaseCommand, LoggerMixin): |
1504 | self.log_base, traceback.format_exc())) | 1507 | self.log_base, traceback.format_exc())) |
1505 | # error_list.append(1) | 1508 | # error_list.append(1) |
1506 | # return | 1509 | # return |
1510 | elif classify_1_str == '12' or classify_1_str == '29': # weixin e-bs 或e-invoice 都走微信电子流水逻辑 | ||
1511 | try: | ||
1512 | max_img_count = 500 | ||
1513 | for times in range(consts.RETRY_TIMES): | ||
1514 | try: | ||
1515 | if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX): | ||
1516 | self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] ' | ||
1517 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1518 | times, pdf_path)) | ||
1519 | elif os.path.exists(pdf_path): | ||
1520 | self.online_log.info('{0} [pdf from zip file] [task={1}] [times={2}] ' | ||
1521 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1522 | times, pdf_path)) | ||
1523 | else: | ||
1524 | # self.edms.download(pdf_path, doc.metadata_version_id) | ||
1525 | self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme, | ||
1526 | business_type) | ||
1527 | self.online_log.info('{0} [ecm download success] [task={1}] [times={2}] ' | ||
1528 | '[pdf_path={3}]'.format(self.log_base, task_str, | ||
1529 | times, pdf_path)) | ||
1530 | |||
1531 | # 3.PDF文件提取图片 | ||
1532 | self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format( | ||
1533 | self.log_base, task_str, times)) | ||
1534 | start_time = time.time() | ||
1535 | pdf_handler.extract_image_for_weixin(max_img_count) | ||
1536 | end_time = time.time() | ||
1537 | speed_time = int(end_time - start_time) | ||
1538 | self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format( | ||
1539 | self.log_base, task_str, times, speed_time, pdf_handler.is_new_modify)) | ||
1540 | except Exception as e: | ||
1541 | self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] ' | ||
1542 | '[error={3}]'.format(self.log_base, task_str, times, | ||
1543 | traceback.format_exc())) | ||
1544 | else: | ||
1545 | break | ||
1546 | else: | ||
1547 | raise Exception('download or pdf to img failed') | ||
1548 | |||
1549 | if pdf_handler.img_count == 0: | ||
1550 | self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format( | ||
1551 | self.log_base, task_str)) | ||
1552 | raise Exception('pdf img empty') | ||
1553 | elif pdf_handler.img_count >= max_img_count: | ||
1554 | self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format( | ||
1555 | self.log_base, task_str, pdf_handler.img_count)) | ||
1556 | |||
1557 | try: | ||
1558 | report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport | ||
1559 | report_table.objects.create( | ||
1560 | case_number=doc.application_id, | ||
1561 | request_team=RequestTeam.get_value(doc.document_scheme, 0), | ||
1562 | request_trigger=RequestTrigger.get_value(doc.data_source, 0), | ||
1563 | input_file=doc.document_name, | ||
1564 | transaction_start=doc.start_time, | ||
1565 | transaction_end=doc.start_time, | ||
1566 | successful_at_this_level=False, | ||
1567 | failure_reason=FailureReason.IMG_LIMIT.value, | ||
1568 | process_name=ProcessName.ALL.value, | ||
1569 | notes='pdf page count: {0}'.format(str(pdf_handler.img_count)) | ||
1570 | ) | ||
1571 | except Exception as e: | ||
1572 | self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( | ||
1573 | self.log_base, traceback.format_exc())) | ||
1574 | |||
1575 | try: | ||
1576 | doc.status = DocStatus.PROCESS_FAILED.value | ||
1577 | doc.page_count = pdf_handler.page_count | ||
1578 | doc.save() | ||
1579 | except Exception as e: | ||
1580 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1581 | self.log_base, traceback.format_exc())) | ||
1582 | else: | ||
1583 | try: | ||
1584 | if pdf_handler.is_e_pdf: | ||
1585 | doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \ | ||
1586 | json.dumps(pdf_handler.metadata) | ||
1587 | doc.page_count = pdf_handler.page_count | ||
1588 | doc.save() | ||
1589 | except Exception as e: | ||
1590 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1591 | self.log_base, traceback.format_exc())) | ||
1592 | |||
1593 | with lock: | ||
1594 | todo_count_dict[task_str] = pdf_handler.img_count | ||
1595 | |||
1596 | self.online_log.info('{0} [pdf_2_img_2_queue] [{1}] [is_ebank={2}]'.format( | ||
1597 | self.log_base, task_str, pdf_handler.is_ebank | ||
1598 | )) | ||
1599 | for img_idx, img_path in enumerate(pdf_handler.img_path_list): | ||
1600 | while img_queue.full(): | ||
1601 | self.online_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base)) | ||
1602 | time.sleep(self.sleep_time_img_put) | ||
1603 | if pdf_handler.is_e_weixin_bs: | ||
1604 | try: | ||
1605 | #self.online_log.info('{0} [pdf_2_img_2_queue] [img_idx={1}] [page_text_list={2}]'.format(self.log_base, img_idx, pdf_handler.page_text_list)) | ||
1606 | text_list = pdf_handler.page_text_list[img_idx].pop('rebuild_text') | ||
1607 | except Exception as e: | ||
1608 | text_list = [] | ||
1609 | else: | ||
1610 | text_list = [] | ||
1611 | img_queue.put((business_type, img_path, text_list)) | ||
1612 | except Exception as e: | ||
1613 | try: | ||
1614 | end_time = timezone.now() | ||
1615 | report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport | ||
1616 | report_table.objects.create( | ||
1617 | case_number=doc.application_id, | ||
1618 | request_team=RequestTeam.get_value(doc.document_scheme, 0), | ||
1619 | request_trigger=RequestTrigger.get_value(doc.data_source, 0), | ||
1620 | input_file=doc.document_name, | ||
1621 | transaction_start=doc.start_time, | ||
1622 | transaction_end=end_time, | ||
1623 | successful_at_this_level=False, | ||
1624 | failure_reason=FailureReason.PDF.value, | ||
1625 | process_name=ProcessName.ALL.value, | ||
1626 | ) | ||
1627 | except Exception as e: | ||
1628 | self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( | ||
1629 | self.log_base, traceback.format_exc())) | ||
1630 | |||
1631 | try: | ||
1632 | doc.status = DocStatus.PROCESS_FAILED.value | ||
1633 | doc.page_count = pdf_handler.page_count | ||
1634 | doc.save() | ||
1635 | self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' | ||
1636 | '[error={2}]'.format(self.log_base, task_str, traceback.format_exc())) | ||
1637 | except Exception as e: | ||
1638 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | ||
1639 | self.log_base, traceback.format_exc())) | ||
1640 | # error_list.append(1) | ||
1641 | # return | ||
1507 | else: # e-contract or or e-fsm-contract or e-hmh | 1642 | else: # e-contract or or e-fsm-contract or e-hmh |
1508 | try: | 1643 | try: |
1509 | # pdf下载 处理 图片存储 识别 | 1644 | # pdf下载 处理 图片存储 识别 |
... | @@ -1674,6 +1809,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1674,6 +1809,7 @@ class Command(BaseCommand, LoggerMixin): |
1674 | json_data_1['text_list'] = text_list | 1809 | json_data_1['text_list'] = text_list |
1675 | 1810 | ||
1676 | start_time = time.time() | 1811 | start_time = time.time() |
1812 | self.online_log.info('{0} [ocr_1 api] [img={1}] [json_data_1={2}]'.format(self.log_base, img_path, json_data_1)) | ||
1677 | ocr_1_response = requests.post(url, json=json_data_1) | 1813 | ocr_1_response = requests.post(url, json=json_data_1) |
1678 | if ocr_1_response.status_code != 200: | 1814 | if ocr_1_response.status_code != 200: |
1679 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) | 1815 | raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code)) |
... | @@ -1684,6 +1820,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1684,6 +1820,7 @@ class Command(BaseCommand, LoggerMixin): |
1684 | else: | 1820 | else: |
1685 | self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url)) | 1821 | self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url)) |
1686 | ocr_1_res = ocr_1_response.json() | 1822 | ocr_1_res = ocr_1_response.json() |
1823 | self.online_log.info('{0} [ocr_1 api res] [img={1}] [ocr_1_res={2}]'.format(self.log_base, img_path, ocr_1_res)) | ||
1687 | end_time = time.time() | 1824 | end_time = time.time() |
1688 | speed_time = int(end_time - start_time) | 1825 | speed_time = int(end_time - start_time) |
1689 | self.online_log.info('{0} [ocr_1 success] [img={1}] [url={2}] [speed_time={3}]'.format( | 1826 | self.online_log.info('{0} [ocr_1 success] [img={1}] [url={2}] [speed_time={3}]'.format( | ... | ... |
... | @@ -11,6 +11,8 @@ from openpyxl import Workbook | ... | @@ -11,6 +11,8 @@ from openpyxl import Workbook |
11 | from openpyxl.styles import PatternFill, numbers | 11 | from openpyxl.styles import PatternFill, numbers |
12 | from openpyxl.utils import get_column_letter | 12 | from openpyxl.utils import get_column_letter |
13 | from apps.doc import consts | 13 | from apps.doc import consts |
14 | import logging | ||
15 | online_log = logging.getLogger('online') | ||
14 | 16 | ||
15 | 17 | ||
16 | class BSWorkbook(Workbook): | 18 | class BSWorkbook(Workbook): |
... | @@ -562,6 +564,8 @@ class BSWorkbook(Workbook): | ... | @@ -562,6 +564,8 @@ class BSWorkbook(Workbook): |
562 | borrow_cell = None if borrow_cell_idx is None or borrow_cell_idx >= length else rows[borrow_cell_idx] | 564 | borrow_cell = None if borrow_cell_idx is None or borrow_cell_idx >= length else rows[borrow_cell_idx] |
563 | 565 | ||
564 | summary_cell_value = None if summary_cell is None else summary_cell.value | 566 | summary_cell_value = None if summary_cell is None else summary_cell.value |
567 | if summary_cell.value is not None: | ||
568 | summary_cell_value = summary_cell_value.strip() | ||
565 | date_cell_value = None if date_cell is None else date_cell.value | 569 | date_cell_value = None if date_cell is None else date_cell.value |
566 | amount_cell_value = None if amount_cell is None else amount_cell.value | 570 | amount_cell_value = None if amount_cell is None else amount_cell.value |
567 | over_cell_value = None if over_cell is None else over_cell.value | 571 | over_cell_value = None if over_cell is None else over_cell.value |
... | @@ -638,6 +642,7 @@ class BSWorkbook(Workbook): | ... | @@ -638,6 +642,7 @@ class BSWorkbook(Workbook): |
638 | 642 | ||
639 | # 3.2.提取信息、高亮 | 643 | # 3.2.提取信息、高亮 |
640 | # row = summary_cell.row | 644 | # row = summary_cell.row |
645 | # online_log.info('[ti qu xin xi gao liang =========== >] [summary_cell_value={0}]'.format(summary_cell_value)) | ||
641 | if summary_cell is not None: | 646 | if summary_cell is not None: |
642 | # 关键词1提取 | 647 | # 关键词1提取 |
643 | if summary_cell_value in self.interest_keyword: | 648 | if summary_cell_value in self.interest_keyword: | ... | ... |
... | @@ -693,6 +693,14 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -693,6 +693,14 @@ class UploadDocView(GenericView, DocHandler): |
693 | classify_1 = classify_1_tmp | 693 | classify_1 = classify_1_tmp |
694 | break | 694 | break |
695 | 695 | ||
696 | if classify_1 == 0 and ('微信支付交易明细证明' in document_name or '微信流水' in document_name): | ||
697 | classify_1 = 12 | ||
698 | self.running_log.info('[weixin bs process] [doc_id={0}]'.format(doc.id)) | ||
699 | |||
700 | if classify_1 == 0 and (document_name.startswith("dzfp_")): | ||
701 | classify_1 = 0 | ||
702 | self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id)) | ||
703 | |||
696 | 704 | ||
697 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | 705 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ |
698 | or document_name.endswith('.RAR'): | 706 | or document_name.endswith('.RAR'): |
... | @@ -1247,6 +1255,14 @@ class DocView(DocGenericView, DocHandler): | ... | @@ -1247,6 +1255,14 @@ class DocView(DocGenericView, DocHandler): |
1247 | if keyword in document_name: | 1255 | if keyword in document_name: |
1248 | classify_1 = classify_1_tmp | 1256 | classify_1 = classify_1_tmp |
1249 | break | 1257 | break |
1258 | |||
1259 | if classify_1 == 0 and ('微信支付交易明细证明' in document_name or '微信流水' in document_name): | ||
1260 | classify_1 = 12 | ||
1261 | self.running_log.info('[weixin bs process] [doc_id={0}]'.format(doc.id)) | ||
1262 | |||
1263 | if classify_1 == 0 and (document_name.startswith("dzfp_")): | ||
1264 | classify_1 = 0 | ||
1265 | self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id)) | ||
1250 | 1266 | ||
1251 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] | 1267 | # tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)] |
1252 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | 1268 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)]) | ... | ... |
... | @@ -69,6 +69,7 @@ class PDFHandler: | ... | @@ -69,6 +69,7 @@ class PDFHandler: |
69 | self.suffix = self.get_suffix(document_name) | 69 | self.suffix = self.get_suffix(document_name) |
70 | self.is_ebank = False | 70 | self.is_ebank = False |
71 | self.is_e_pdf = False | 71 | self.is_e_pdf = False |
72 | self.is_e_weixin_bs = False | ||
72 | self.page_text_list = [] | 73 | self.page_text_list = [] |
73 | self.pdf_info = {} | 74 | self.pdf_info = {} |
74 | self.img_path_pno_list = [] | 75 | self.img_path_pno_list = [] |
... | @@ -186,6 +187,8 @@ class PDFHandler: | ... | @@ -186,6 +187,8 @@ class PDFHandler: |
186 | self.img_path_list.append(img_save_path) | 187 | self.img_path_list.append(img_save_path) |
187 | if self.is_ebank: | 188 | if self.is_ebank: |
188 | self.rebuild_bbox(pm.width, pm.height, page.number) | 189 | self.rebuild_bbox(pm.width, pm.height, page.number) |
190 | if self.is_e_weixin_bs: | ||
191 | self.rebuild_bbox(pm.width, pm.height, page.number) | ||
189 | 192 | ||
190 | @staticmethod | 193 | @staticmethod |
191 | def getimage(pix): | 194 | def getimage(pix): |
... | @@ -407,6 +410,57 @@ class PDFHandler: | ... | @@ -407,6 +410,57 @@ class PDFHandler: |
407 | self.is_e_pdf = True | 410 | self.is_e_pdf = True |
408 | self.page_text_list = page_text_list | 411 | self.page_text_list = page_text_list |
409 | 412 | ||
413 | def put_text(self, pdf): | ||
414 | page_text_list = [] | ||
415 | text_item_sum = 0 | ||
416 | for pno in range(pdf.pageCount): | ||
417 | page = pdf.loadPage(pno) | ||
418 | if page.rotation is None: | ||
419 | rotation = 0 | ||
420 | elif isinstance(page.rotation, int): | ||
421 | divisor, remainder = divmod(page.rotation, 90) | ||
422 | if remainder != 0: | ||
423 | return | ||
424 | rotation = divmod(divisor, 4)[1] | ||
425 | else: | ||
426 | return | ||
427 | textpage = page.getTextPage() | ||
428 | text = textpage.extractDICT() | ||
429 | text_list = [] | ||
430 | for block in text.get('blocks'): | ||
431 | for line in block.get('lines'): | ||
432 | for span in line.get('spans'): | ||
433 | char = span.get('text') | ||
434 | |||
435 | if char.strip() == '': | ||
436 | continue | ||
437 | |||
438 | # 特殊emoji跳过 | ||
439 | try: | ||
440 | print(char) | ||
441 | except Exception as e: | ||
442 | continue | ||
443 | |||
444 | bbox = span.get('bbox') | ||
445 | if pno == 0 and self.title_is_ebank(char): | ||
446 | in_ebank_set = True | ||
447 | text_list.append((bbox, char)) | ||
448 | text_item_sum += len(text_list) | ||
449 | if text_item_sum < (pno + 1) * 5: | ||
450 | return | ||
451 | else: | ||
452 | page_text_list.append( | ||
453 | { | ||
454 | 'width': text.get('width'), | ||
455 | 'height': text.get('height'), | ||
456 | 'rotation': rotation, | ||
457 | 'text': text_list | ||
458 | } | ||
459 | ) | ||
460 | self.is_e_pdf = True | ||
461 | self.is_e_weixin_bs = True | ||
462 | self.page_text_list = page_text_list | ||
463 | |||
410 | def e_contract_process(self): | 464 | def e_contract_process(self): |
411 | os.makedirs(self.img_dir_path, exist_ok=True) | 465 | os.makedirs(self.img_dir_path, exist_ok=True) |
412 | with fitz.Document(self.path) as pdf: | 466 | with fitz.Document(self.path) as pdf: |
... | @@ -473,6 +527,59 @@ class PDFHandler: | ... | @@ -473,6 +527,59 @@ class PDFHandler: |
473 | self.merge_il(pdf, pno, il) | 527 | self.merge_il(pdf, pno, il) |
474 | self.img_count = len(self.img_path_list) | 528 | self.img_count = len(self.img_path_list) |
475 | 529 | ||
530 | def extract_image_for_weixin(self, max_img_count=None): | ||
531 | self.img_path_list = [] | ||
532 | self.xref_set = set() | ||
533 | os.makedirs(self.img_dir_path, exist_ok=True) | ||
534 | |||
535 | if self.suffix in self.img_suffixs: | ||
536 | img_save_path = self.get_img_save_path(0, ext=self.suffix[1:]) | ||
537 | shutil.copy(self.path, img_save_path) | ||
538 | self.img_path_list.append(img_save_path) | ||
539 | else: | ||
540 | with fitz.Document(self.path) as pdf: | ||
541 | # 解密 | ||
542 | for pwd in self.pwd_list: | ||
543 | if not pdf.isEncrypted: | ||
544 | break | ||
545 | pdf.authenticate(pwd) | ||
546 | |||
547 | self.metadata = pdf.metadata | ||
548 | self.page_count = pdf.pageCount | ||
549 | if isinstance(max_img_count, int) and pdf.pageCount >= max_img_count: | ||
550 | self.img_count = pdf.pageCount | ||
551 | return | ||
552 | self.put_text(pdf) | ||
553 | for pno in range(pdf.pageCount): | ||
554 | il = pdf.getPageImageList(pno) # 获取页面图片对象 | ||
555 | # (xref, smask, width, height, bpc, colorspace, alt.colorspace, name, filter, invoker) | ||
556 | |||
557 | # 1.页面图片对象数目为0时,保存整个页面为png图片 | ||
558 | if self.is_e_pdf or self.is_ebank or len(il) == 0: | ||
559 | page = pdf.loadPage(pno) | ||
560 | self.page_to_png(page) | ||
561 | # 2.页面图片对象数目为1时: | ||
562 | # 小图(如电子账单的盖章):保存整个页面为png图片 | ||
563 | # 大图:提取图片对象 | ||
564 | elif len(il) == 1: | ||
565 | xref, smask, width, height, _, colorspace, _, _, _ = il[0] | ||
566 | # 小图 | ||
567 | if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]: | ||
568 | page = pdf.loadPage(pno) | ||
569 | self.page_to_png(page) | ||
570 | # 大图 | ||
571 | elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]: | ||
572 | self.is_new_modify = 1 | ||
573 | is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大 | ||
574 | page = pdf.loadPage(pno) | ||
575 | self.page_to_png(page, is_big_img=is_big_img) | ||
576 | elif xref not in self.xref_set: | ||
577 | self.extract_single_image(pdf, xref, smask, colorspace, pno) | ||
578 | # 3.页面图片对象数目大于1时,特殊处理 | ||
579 | else: | ||
580 | self.merge_il(pdf, pno, il) | ||
581 | self.img_count = len(self.img_path_list) | ||
582 | |||
476 | def extract_page_image(self): | 583 | def extract_page_image(self): |
477 | self.img_path_list = [] | 584 | self.img_path_list = [] |
478 | self.xref_set = set() | 585 | self.xref_set = set() | ... | ... |
-
Please register or sign in to post a comment