diff --git a/src/common/electronic_afc_contract/get_char.py b/src/common/electronic_afc_contract/get_char.py index 1c2af9b..9a7ce07 100644 --- a/src/common/electronic_afc_contract/get_char.py +++ b/src/common/electronic_afc_contract/get_char.py @@ -295,9 +295,8 @@ class Finder: bbox_ytzje = bbox if text == '贷款本金3': bbox_dkbj = bbox - if text == '附加产品融资贷款本': + if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']: bbox_total = bbox - # print(bbox_xm, bbox_ytzje, bbox_dkbj, bbox_total) if bbox_xm: for i in range(10): rh = abs(bbox_xm[1] - bbox_xm[-1]) @@ -337,7 +336,7 @@ class Finder: _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) bbox, total_text = self.ocr_results[page_num][_key] asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) - asp_details_table_term['words'] = asp_details_table + asp_details_table_term['words'] = asp_details_table return asp_details_table_term def get_signature(self): @@ -904,4 +903,4 @@ class Finder: new_results = {"is_asp": self.is_asp, "page_info": self.init_result } - return new_results \ No newline at end of file + return new_results diff --git a/src/common/electronic_hil_contract/get_char.py b/src/common/electronic_hil_contract/get_char.py index 08f7419..90db429 100644 --- a/src/common/electronic_hil_contract/get_char.py +++ b/src/common/electronic_hil_contract/get_char.py @@ -618,7 +618,7 @@ class Finder: page_num = i texts.append(text) boxes.append(bbox) - print(texts) + # print(texts) if len(texts) > 4: words = '有' else: @@ -785,41 +785,60 @@ class Finder: def get_table_add_product(self): table_add_product = self.item.copy() - items = [] - start = False - page = None - greater_equal_v35 = False + add_product_page_num = None for pno in self.pdf_info: - condition = False for block in self.pdf_info[f'{pno}']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] - if text == '租赁利率': - greater_equal_v35 = True - if '总计' in text: - start = True - if '注:出租人向承租人购买租赁车辆的对价' in text: - page = pno - start = False - if start == True: - items.append(text) + if '车辆附加产品(明细见下表)' in text: + add_product_page_num = pno + ocr_results = [] + for block in self.pdf_info[f'{add_product_page_num}']['blocks']: + if block['type'] != 0: + continue + for line in block['lines']: + for span in line['spans']: + bbox, text = span['bbox'], span['text'] + xmin, ymin, xmax, ymax = bbox + bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] + ocr_results.append([bbox, text]) lines = [['项目', '购买价格', '实际融资金额']] - if greater_equal_v35: - for i in range(len(items) // 4): - line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]] - lines.append(line) - else: - for i in range(len(items) // 3): - line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] - lines.append(line) - if len(items) > 0: - lines.append([items[0], '', items[1]]) + key_xm = None + key_gmjg = None + key_sjrzje = None + key_total = None + for index, span in enumerate(ocr_results): + if span[1] == '项目': + key_xm = index + if span[1] == '购买价格': + key_gmjg = index + if span[1] == '实际融资金额': + key_sjrzje = index + if span[1] == '总计': + key_total = index + bbox, text = ocr_results[key_xm] + rh = abs(bbox[1] - bbox[-1]) + anchor = np.array(bbox).reshape((-1, 2)) + anchor[:, 0] += 2 * rh + anchor[:, 1] += rh + for i in range(5): + for span in ocr_results: + iou = caculate_iou(anchor, span[0]) + if iou > 0 and span[1].strip() != '所购': + x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) + y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) + line = [span[1].replace('\u3000', ' '), x, y] + lines.append(line) + anchor = np.array(span[0]).reshape((-1, 2)) + anchor[:, 1] += rh + total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) + lines.append(['总计', '', total]) table_add_product['words'] = lines - table_add_product['page'] = page + table_add_product['page'] = add_product_page_num table_add_product['position'] = None return table_add_product