d3de42e6 by 周伟奇

fix HIL contract

1 parent a7933381
......@@ -6,14 +6,11 @@
# @Description :
import re
import cv2
import base64
import numpy as np
from fuzzywuzzy import fuzz
class Finder:
def __init__(self, pdf_info):
self.pdf_info = pdf_info
self.item = {"words": None,
......@@ -25,6 +22,9 @@ class Finder:
"承租人-姓名": self.item,
"承租人-证件号码": self.item,
"承租人-法定代表人或授权代表": self.item,
"共同承租人-姓名": self.item,
"共同承租人-证件号码": self.item,
"共同承租人-法定代表人或授权代表": self.item,
"保证人1-姓名": self.item,
"保证人1-证件号码": self.item,
"保证人1-法定代表人或授权代表": self.item,
......@@ -47,6 +47,8 @@ class Finder:
"银行账户-开户行": self.item,
"签字页-承租人姓名": self.item,
"签字页-承租人签章": self.item,
"签字页-共同承租人姓名": self.item,
"签字页-共同承租人签章": self.item,
"签字页-保证人1姓名": self.item,
"签字页-保证人1签章": self.item,
"签字页-保证人2姓名": self.item,
......@@ -54,7 +56,6 @@ class Finder:
"签字页-保证人3姓名": self.item,
"签字页-保证人3签章": self.item,
}
# 格式化输出 车辆处置协议 要是别的字段
self.init_result_1 = {"合同编号": self.item,
"承租人-姓名": self.item,
......@@ -66,9 +67,7 @@ class Finder:
"签字页-承租人签章": self.item,
"签字页-销售经销商": self.item,
"签字页-销售经销商签章": self.item,
}
# 格式化输出 车辆租赁抵押合同
self.init_result_2 = {"合同编号": self.item,
"合同编号(正文)": self.item,
......@@ -150,23 +149,24 @@ class Finder:
words = matchObj.group(1)
contract_no['position'] = None
contract_no['page'] = pno
contract_no['words'] = words
# contract_no['words'] = words
contract_no['words'] = re.sub("\s", "", words).replace(")", "")
return contract_no
matchObj = re.search(r'编号为(.*?)的', all_text)
if matchObj:
words = matchObj.group(1).strip()
contract_no['position'] = None
contract_no['page'] = pno
contract_no['words'] = words
# contract_no['words'] = words
contract_no['words'] = re.sub("\s", "", words).replace(")", "")
return contract_no
matchObj = re.search(r'编号为(.*?))的', all_text)
if matchObj:
words = matchObj.group(1).strip()
contract_no['position'] = None
contract_no['page'] = pno
contract_no['words'] = words
# contract_no['words'] = words
contract_no['words'] = re.sub("\s", "", words)
return contract_no
def get_key_value(self, key, page_num=None):
......@@ -180,10 +180,11 @@ class Finder:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if key in text:
words = text.split(':')[-1]
words = text.split(':')[-1].replace("。", "")
value['position'] = bbox
value['page'] = pno
value['words'] = words
# value['words'] = words
value['words'] = re.sub("\s", "", words)
else:
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -194,10 +195,11 @@ class Finder:
bbox, text = span['bbox'], span['text']
if key in text:
# print(self.pdf_info[pno])
words = text.split(':')[-1]
words = text.split(':')[-1].replace("。", "")
value['position'] = bbox
value['page'] = pno
value['words'] = words
# value['words'] = words
value['words'] = re.sub("\s", "", words)
return value
def get_loan_principal(self, page_num='0'):
......@@ -267,7 +269,6 @@ class Finder:
def get_asp_details(self, page_num):
asp_details_table_term = self.item.copy()
asp_details_table = []
asp_details_text_list = []
table = False
......@@ -283,25 +284,20 @@ class Finder:
table = False
if table == True:
asp_details_text_list.append(text)
for i in range((len(asp_details_text_list)+2)//3):
for i in range((len(asp_details_text_list) + 2) // 3):
line = []
if i == 0:
line = [asp_details_text_list[0]]
else:
for j in range(3):
line.append(asp_details_text_list[i*3-2+j])
line.append(asp_details_text_list[i * 3 - 2 + j])
asp_details_table.append(line)
if len(asp_details_table) > 0:
asp_details_table_term['words'] = asp_details_table
return asp_details_table_term
def get_signature(self):
signature = self.item.copy()
for block in self.pdf_info['0']['blocks']:
if block['type'] != 0:
continue
......@@ -369,8 +365,8 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \
anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]:
seller['position'] = bbox
seller['words'] = text
return seller
......@@ -430,7 +426,6 @@ class Finder:
def get_repayment_schedule(self):
repayment_schedule = self.item.copy()
repayment_schedule_text_list = []
table = False
page = None
......@@ -444,20 +439,25 @@ class Finder:
if '以上表格中所列序号' in text:
table = False
if table == True:
# 过滤汉字
if re.compile(r'[\u4e00-\u9fff]').search(text):
continue
# 过滤 1. - 61. 这些标题
if re.findall("\d+", text):
if len(re.findall("\d+", text)) == 1:
continue
repayment_schedule_text_list.append(text)
if '61.' in text:
page = pno
table = True
# print("repayment_schedule_text_list = ", repayment_schedule_text_list)
repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
for i in range(len(repayment_schedule_text_list)//4):
line = [f'{i+1}.']
for i in range(len(repayment_schedule_text_list) // 4):
line = [f'{i + 1}.']
# 4表示4列的意思
for j in range(4):
line.append(repayment_schedule_text_list[i*4+j])
line.append(repayment_schedule_text_list[i * 4 + j])
repayment_schedule_table.append(line)
repayment_schedule['words'] = repayment_schedule_table
repayment_schedule['page'] = page
return repayment_schedule
......@@ -506,7 +506,7 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_2['page_num'] = page_num
signature_role_2['position'] = position
signature_role_2['words'] = words
......@@ -541,7 +541,7 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_3['page_num'] = page_num
signature_role_3['position'] = position
signature_role_3['words'] = words
......@@ -576,7 +576,7 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_4['page_num'] = page_num
signature_role_4['position'] = position
signature_role_4['words'] = words
......@@ -612,7 +612,7 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_5['page_num'] = page_num
signature_role_5['position'] = position
signature_role_5['words'] = words
......@@ -640,7 +640,7 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
name = text.split(' ')[0]
date = text.split(':')[-1]
signature_name['words'] = name
......@@ -663,7 +663,7 @@ class Finder:
if top in text:
anchor_top = bbox[1]
if bottom in text:
anchor_bottom = bbox[1]
anchor_bottom = bbox[3]
if anchor_top is not None and anchor_bottom is not None:
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -672,7 +672,9 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
# ------------ #
# print("--text = ", text)
if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
words = text
signature['words'] = words
signature['page'] = pno
......@@ -683,7 +685,6 @@ class Finder:
name = self.item.copy()
id_num = self.item.copy()
representative = self.item.copy()
# 以保证人3 的左上角为定位点
anchor = None
for block in self.pdf_info[page_num]['blocks']:
......@@ -695,7 +696,6 @@ class Finder:
# 找到角色姓名
if re.match('保证人3', text) is not None:
anchor = [bbox[0], bbox[1]]
if anchor is not None:
for block in self.pdf_info[page_num]['blocks']:
if block['type'] != 0:
......@@ -711,52 +711,60 @@ class Finder:
name['position'] = bbox
if role_key == '承租人:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人1:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人2:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人3:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
......@@ -783,12 +791,10 @@ class Finder:
start = False
if start == True:
items.append(text)
lines = [['项目', '购买价格', '实际融资金额']]
for i in range(len(items)//3):
line = [items[2+i*3+0], items[2+i*3+1], items[2+i*3+2]]
for i in range(len(items) // 3):
line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
lines.append(line)
if len(items) > 0:
lines.append([items[0], '', items[1]])
......@@ -800,7 +806,6 @@ class Finder:
def get_contract_no_dy(self):
# 查找抵押合同编号
contract_no = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -811,7 +816,6 @@ class Finder:
bbox, text = span['bbox'], span['text']
if '抵押合同编号' in text:
key_box = bbox
if key_box is not None:
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -829,7 +833,6 @@ class Finder:
def get_dyr_name_id(self):
name = self.item.copy()
_id = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -842,7 +845,7 @@ class Finder:
key_box = bbox
if key_box is not None:
rh = abs(key_box[1]-key_box[3])
rh = abs(key_box[1] - key_box[3])
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
......@@ -850,12 +853,12 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text:
words = text.split(':')[-1]
name['position'] = bbox
name['page'] = pno
name['words'] = words
if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text:
words = text.split(':')[-1]
_id['position'] = bbox
_id['page'] = pno
......@@ -864,7 +867,6 @@ class Finder:
def get_key_value_position(self, key):
value = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -875,9 +877,8 @@ class Finder:
bbox, text = span['bbox'], span['text']
if text == key:
key_box = bbox
if key_box is not None:
rh = abs(key_box[1]-key_box[3])
rh = abs(key_box[1] - key_box[3])
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
......@@ -885,13 +886,104 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(
key_box[2] - bbox[0]) < rh * 10:
words = text
value['position'] = bbox
value['page'] = pno
value['words'] = words
return value
def get_role_info_3_3(self, role_key, page_num='0'):
name = self.item.copy()
id_num = self.item.copy()
representative = self.item.copy()
# 以保证人2 的左上角为定位点
anchor = None
for block in self.pdf_info[page_num]['blocks']:
if block['type'] != 0:
continue
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
# 找到角色姓名
if re.match('保证人2', text) is not None:
anchor = [bbox[0], bbox[1]]
if anchor is not None:
for block in self.pdf_info[page_num]['blocks']:
if block['type'] != 0:
continue
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
# 找到角色姓名
if re.match(role_key, text) is not None:
words = text.split(':')[-1]
name['words'] = words
name['page'] = page_num
name['position'] = bbox
if role_key == '承租人一:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '共同承租人:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人1:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人2:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
return name, id_num, representative
def get_info(self):
"""
block['type'] == 0 : 表示该元素为图片
......@@ -905,6 +997,8 @@ class Finder:
self.init_result['合同编号'] = contract_no
# 从第一页上取四个角色的姓名和证件号码
name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0')
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0')
self.init_result['承租人-姓名'] = name
self.init_result['承租人-证件号码'] = id_num
self.init_result['承租人-法定代表人或授权代表'] = representative
......@@ -912,14 +1006,31 @@ class Finder:
self.init_result['保证人1-姓名'] = name
self.init_result['保证人1-证件号码'] = id_num
self.init_result['保证人1-法定代表人或授权代表'] = representative
# if条件判别 对应3_3版本
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0')
self.init_result['共同承租人-姓名'] = name
self.init_result['共同承租人-证件号码'] = id_num
self.init_result['共同承租人-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0')
self.init_result['保证人2-姓名'] = name
self.init_result['保证人2-证件号码'] = id_num
self.init_result['保证人2-法定代表人或授权代表'] = representative
# if条件判别 对应3_3版本
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0')
self.init_result['保证人2-姓名'] = name
self.init_result['保证人2-证件号码'] = id_num
self.init_result['保证人2-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0')
self.init_result['保证人3-姓名'] = name
self.init_result['保证人3-证件号码'] = id_num
self.init_result['保证人3-法定代表人或授权代表'] = representative
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0')
self.init_result['保证人3-姓名'] = name
self.init_result['保证人3-证件号码'] = id_num
self.init_result['保证人3-法定代表人或授权代表'] = representative
# 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出
contract_no = self.get_contract_no_one()
self.init_result['合同编号(正文)'] = contract_no
......@@ -955,6 +1066,9 @@ class Finder:
# 承租人姓名、签章
name = self.get_key_value(key='承租人姓名:')
electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:')
if name["words"] == None:
name = self.get_key_value(key='承租人一姓名:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:')
self.init_result['签字页-承租人姓名'] = name
self.init_result['签字页-承租人签章'] = electronic_signature
# 保证人1姓名、签章
......@@ -962,19 +1076,35 @@ class Finder:
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
# 这里用的是 name["words"] == ""
if name["words"] == "":
name = self.get_key_value(key='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:')
self.init_result['签字页-共同承租人姓名'] = name
self.init_result['签字页-共同承租人签章'] = electronic_signature
# 保证人2姓名、签章
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
# 保证人2姓名、签章
# if判断条件对应3_3版本
if name["words"] == "":
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
# 保证人3姓名、签章
name = self.get_key_value(key='保证人3姓名:')
electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:')
self.init_result['签字页-保证人3姓名'] = name
self.init_result['签字页-保证人3签章'] = electronic_signature
# if判断条件对应3_3版本
if name["words"] == None:
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
return self.init_result
# results['is_shhz_contract'] = True
# results['pdf_info'] = self.init_result
......
......@@ -18,7 +18,6 @@ def predict(pdf_info, file_cls):
Returns:
TYPE: Description
"""
# 0: 售后回租合同
pdf_info_0 = []
for pno in pdf_info:
......@@ -30,7 +29,6 @@ def predict(pdf_info, file_cls):
bbox, text = span['bbox'], span['text']
if '售后回租合同_' in text:
pdf_info_0.append(pdf_info[pno])
# 1: 车辆处置协议
pdf_info_1 = []
for pno in pdf_info:
......@@ -42,7 +40,6 @@ def predict(pdf_info, file_cls):
bbox, text = span['bbox'], span['text']
if '售后回租合同附件一' in text:
pdf_info_1.append(pdf_info[pno])
# 2: 车辆租赁抵押合同
pdf_info_2 = []
for pno in pdf_info:
......@@ -54,7 +51,6 @@ def predict(pdf_info, file_cls):
bbox, text = span['bbox'], span['text']
if '车辆租赁抵押合同_' in text:
pdf_info_2.append(pdf_info[pno])
is_clczxy = False
# 如果 pdf_info_1 == 4 页,则说明此时输入包含了车辆处置协议
if len(pdf_info_1) == 4 and file_cls == 1 and len(pdf_info_0) != 0:
......@@ -62,7 +58,6 @@ def predict(pdf_info, file_cls):
pdf_info = dict()
for pno, page_info in enumerate(pdf_info_1):
pdf_info[str(pno)] = page_info
f = Finder(pdf_info)
if file_cls == 0:
results = f.get_info()
......@@ -72,13 +67,11 @@ def predict(pdf_info, file_cls):
if file_cls == 2:
# 提取信息 ———— 车辆租赁抵押合同
results = f.get_info_2()
if is_clczxy == True:
if is_clczxy is True:
for key in results:
if results[key]['page'] is not None:
results[key]['page'] = str(int(results[key]['page'])+6)
results[key]['page'] = str(int(results[key]['page']) + 6)
for key in results:
if results[key]['page'] is not None:
results[key]['page'] = 'page_' + str(int(results[key]['page'])+1)
results[key]['page'] = 'page_' + str(int(results[key]['page']) + 1)
return results
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!