08b0fd9e by 周伟奇

e-contract fix

1 parent c878c21a
......@@ -295,9 +295,8 @@ class Finder:
bbox_ytzje = bbox
if text == '贷款本金3':
bbox_dkbj = bbox
if text == '附加产品融资贷款本':
if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']:
bbox_total = bbox
# print(bbox_xm, bbox_ytzje, bbox_dkbj, bbox_total)
if bbox_xm:
for i in range(10):
rh = abs(bbox_xm[1] - bbox_xm[-1])
......
......@@ -618,7 +618,7 @@ class Finder:
page_num = i
texts.append(text)
boxes.append(bbox)
print(texts)
# print(texts)
if len(texts) > 4:
words = '有'
else:
......@@ -785,41 +785,60 @@ class Finder:
def get_table_add_product(self):
table_add_product = self.item.copy()
items = []
start = False
page = None
greater_equal_v35 = False
add_product_page_num = None
for pno in self.pdf_info:
condition = False
for block in self.pdf_info[f'{pno}']['blocks']:
if block['type'] != 0:
continue
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if text == '租赁利率':
greater_equal_v35 = True
if '总计' in text:
start = True
if '注:出租人向承租人购买租赁车辆的对价' in text:
page = pno
start = False
if start == True:
items.append(text)
if '车辆附加产品(明细见下表)' in text:
add_product_page_num = pno
ocr_results = []
for block in self.pdf_info[f'{add_product_page_num}']['blocks']:
if block['type'] != 0:
continue
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
xmin, ymin, xmax, ymax = bbox
bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
ocr_results.append([bbox, text])
lines = [['项目', '购买价格', '实际融资金额']]
if greater_equal_v35:
for i in range(len(items) // 4):
line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]]
lines.append(line)
else:
for i in range(len(items) // 3):
line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
key_xm = None
key_gmjg = None
key_sjrzje = None
key_total = None
for index, span in enumerate(ocr_results):
if span[1] == '项目':
key_xm = index
if span[1] == '购买价格':
key_gmjg = index
if span[1] == '实际融资金额':
key_sjrzje = index
if span[1] == '总计':
key_total = index
bbox, text = ocr_results[key_xm]
rh = abs(bbox[1] - bbox[-1])
anchor = np.array(bbox).reshape((-1, 2))
anchor[:, 0] += 2 * rh
anchor[:, 1] += rh
for i in range(5):
for span in ocr_results:
iou = caculate_iou(anchor, span[0])
if iou > 0 and span[1].strip() != '所购':
x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results)
y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results)
line = [span[1].replace('\u3000', ' '), x, y]
lines.append(line)
if len(items) > 0:
lines.append([items[0], '', items[1]])
anchor = np.array(span[0]).reshape((-1, 2))
anchor[:, 1] += rh
total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
lines.append(['总计', '', total])
table_add_product['words'] = lines
table_add_product['page'] = page
table_add_product['page'] = add_product_page_num
table_add_product['position'] = None
return table_add_product
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!