b9e0884e by 冯轩

Merge branch 'feature/fsm-contract' into feature/uat-tmp

2 parents 3474c20c 4c67581e
......@@ -62,6 +62,7 @@ class Finder:
},
"page_3": {"合同编号": self.item,
"还款计划表": self.item,
"车辆代理商": self.item,
},
"page_4": {"合同编号": self.item,
"附加产品融资贷款本金总金额明细": self.item,
......@@ -71,6 +72,7 @@ class Finder:
"page_6": {"合同编号": self.item,
},
}
if self.is_asp:
self.init_result["page_7"] = {"合同编号": self.item,
}
self.init_result["page_8"] = {"合同编号": self.item,
......@@ -90,6 +92,25 @@ class Finder:
"日期": self.item,
},
}
else:
self.init_result["page_7"] = {"合同编号": self.item,
"主借人签字": {"签字": self.item,
"日期": self.item,
},
"共借人签字": {"签字": self.item,
"日期": self.item,
},
"保证人1签字": {"签字": self.item,
"日期": self.item,
},
"保证人2签字": {"签字": self.item,
"日期": self.item,
},
"见证人签字": {"签字": self.item,
"日期": self.item,
},
}
def get_top_iou(self, poly, ocr_result):
"""传入一个多边形, 找到与之最匹配的多边形
......@@ -398,6 +419,39 @@ class Finder:
seller['words'] = text
return seller
def get_cldls(self):
seller = self.item.copy()
# 先找到 key
anchor_bbox = None
for block in self.pdf_info['2']['blocks']:
if anchor_bbox is not None:
break
if block['type'] != 0:
continue
for line in block['lines']:
if anchor_bbox is not None:
break
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if text.strip() == '车辆代理商':
anchor_bbox = bbox
# print(anchor_bbox)
# 当找到了 key, 则根据 key 去匹配 value
if anchor_bbox:
half_width = self.pdf_info['2']['width'] * 0.5
for block in self.pdf_info['2']['blocks']:
if block['type'] != 0:
continue
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
seller['position'] = bbox
seller['words'] = text
return seller
return seller
def get_borrower_collection_account(self):
account = self.item.copy()
account_name = self.item.copy()
......@@ -885,6 +939,9 @@ class Finder:
repayment_schedule_table = self.get_repayment_schedule()
# print(repayment_schedule_table)
self.init_result['page_3']['还款计划表'] = repayment_schedule_table
# 车辆代理商
cldls = self.get_cldls()
self.init_result['page_3']['车辆代理商'] = cldls
#######################################
# Page 4
# 找合同编号
......@@ -907,6 +964,8 @@ class Finder:
contract_no = self.get_contract_no(page_num='5')
# print(contract_no)
self.init_result['page_6']['合同编号'] = contract_no
if self.is_asp:
# Page 7
# 找合同编号
contract_no = self.get_contract_no(page_num='6')
......@@ -944,6 +1003,41 @@ class Finder:
# print(signature_name, signature_date)
self.init_result['page_8']['见证人签字']['签字'] = signature_name
self.init_result['page_8']['见证人签字']['日期'] = signature_date
else:
# Page 7
# 找合同编号
contract_no = self.get_contract_no(page_num='6')
self.init_result['page_7']['合同编号'] = contract_no
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='合同编号', bottom='共同借款人')
if signature_name['words'] == None:
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='合同编号', bottom='共同借款人(抵押人)')
# print(signature_name, signature_date)
self.init_result['page_7']['主借人签字']['签字'] = signature_name
self.init_result['page_7']['主借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='共同借款人', bottom='保证人1')
if signature_name['words'] == None:
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='共同借款人(抵押人)', bottom='保证人1')
# print(signature_name, signature_date)
self.init_result['page_7']['共借人签字']['签字'] = signature_name
self.init_result['page_7']['共借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='保证人1', bottom='保证人2')
self.init_result['page_7']['保证人1签字']['签字'] = signature_name
self.init_result['page_7']['保证人1签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='保证人2', bottom='在本人面前亲笔签署本合同')
self.init_result['page_7']['保证人2签字']['签字'] = signature_name
self.init_result['page_7']['保证人2签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='在本人面前亲笔签署本合同', bottom='以下无正文')
# print(signature_name, signature_date)
self.init_result['page_7']['见证人签字']['签字'] = signature_name
self.init_result['page_7']['见证人签字']['日期'] = signature_date
# 重新定制输出
new_results = {"is_asp": self.is_asp,
......
......@@ -51,6 +51,7 @@ class Finder:
"合同编号(正文)": self.item,
"车辆识别代码": self.item,
"车辆卖方(经销商)": self.item,
"车辆代理商": self.item,
"车辆原始销售价格(《机动车销售统一发票》所列金额)": self.item,
"车辆附加产品明细表": self.item,
"融资成本总额": self.item,
......@@ -696,11 +697,13 @@ class Finder:
signature_name['position'] = bbox
return signature_name, signature_date
def get_electronic_signature(self, top, bottom):
def get_electronic_signature(self, top, bottom, t_pno=None):
signature = self.item.copy()
anchor_top = None
anchor_bottom = None
for pno in self.pdf_info:
if t_pno is not None and pno != t_pno:
continue
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
continue
......@@ -709,10 +712,15 @@ class Finder:
bbox, text = span['bbox'], span['text']
if top in text:
anchor_top = bbox[1]
if bottom in text:
elif bottom in text and anchor_top is not None and bbox[3] > anchor_top:
anchor_bottom = bbox[3]
if anchor_top is not None and anchor_bottom is not None:
# print('in')
# print(anchor_top)
# print(anchor_bottom)
for pno in self.pdf_info:
if t_pno is not None and pno != t_pno:
continue
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
continue
......@@ -1005,7 +1013,7 @@ class Finder:
words = text.split(':')[-1]
_id['position'] = bbox
_id['page'] = pno
_id['words'] = words
_id['words'] = words.strip()
return name, _id
def get_key_value_position(self, key):
......@@ -1243,6 +1251,9 @@ class Finder:
if seller['words'] == None:
seller = self.get_key_value(key='车辆卖方:')
self.init_result['车辆卖方(经销商)'] = seller
# 找到车辆代理商
cldls = self.get_key_value(key='车辆代理商', page_num='4')
self.init_result['车辆代理商'] = cldls
# 找到 —— 车辆原始销售价格
vehicle_price = self.get_key_value(key='车辆原始销售价格(《机动车销售统一发票》所列金额):')
self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price
......@@ -1264,7 +1275,7 @@ class Finder:
account = self.get_key_value(key='银行账号:', page_num='4')
self.init_result['收款银行账户-银行账号'] = account
bank = self.get_key_value(key='开户银行:', page_num='4')
self.init_result['承租人收款账户-开户行'] = bank
self.init_result['收款银行账户-开户行'] = bank
# 找承租人扣款账户户名、银行账号、银行
name = self.get_key_value(key='户名:', page_num='5')
self.init_result['银行账户-户名'] = name
......@@ -1277,65 +1288,65 @@ class Finder:
# 承租人姓名、签章
if is_cdfl == False:
name = self.get_key_value(key='承租人姓名:')
electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:', t_pno='5')
if name["words"] == None:
name = self.get_key_value(key='承租人一姓名:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:', t_pno='5')
self.init_result['签字页-承租人姓名'] = name
self.init_result['签字页-承租人签章'] = electronic_signature
# 保证人1姓名、签章
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
# 这里用的是 name["words"] == ""
if name["words"] == "":
name = self.get_key_value(key='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:', t_pno='5')
self.init_result['签字页-共同承租人姓名'] = name
self.init_result['签字页-共同承租人签章'] = electronic_signature
# 保证人2姓名、签章
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:', t_pno='5')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
# if判断条件对应3_3版本
if name["words"] == "":
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
# 保证人3姓名、签章
name = self.get_key_value(key='保证人3姓名:')
electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:')
electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:', t_pno='5')
self.init_result['签字页-保证人3姓名'] = name
self.init_result['签字页-保证人3签章'] = electronic_signature
# if判断条件对应3_3版本
if name["words"] == None:
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:', t_pno='5')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
else:
name = self.get_key_value(key='承租人一姓名:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:', t_pno='5')
self.init_result['签字页-承租人姓名'] = name
self.init_result['签字页-承租人签章'] = electronic_signature
name = self.get_key_value(key='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:', t_pno='5')
self.init_result['签字页-共同承租人姓名'] = name
self.init_result['签字页-共同承租人签章'] = electronic_signature
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:', t_pno='5')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
......@@ -1404,12 +1415,12 @@ class Finder:
self.init_result_2['融资租赁期限'] = lease_term
# 签字页抵押人姓名和签章
name = self.get_key_value(key='抵押人姓名:')
electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:')
electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:', t_pno='1')
self.init_result_2['签字页-抵押人姓名'] = name
self.init_result_2['签字页-抵押人签章'] = electronic_signature
# 签字页抵押人配偶姓名和签章
name = self.get_key_value(key='抵押人配偶姓名:')
electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期')
electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期', t_pno='1')
self.init_result_2['签字页-抵押人配偶姓名'] = name
self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
return self.init_result_2
\ No newline at end of file
......
......@@ -6,6 +6,7 @@ class HMHRetriever:
def __init__(self):
self.words_str = 'words'
self.position_str = 'location'
self.fix_hava_str = '有'
self.default_position = [0, 0, 0, 0]
self.search_fields_list = [
('借款/承租人姓名', ''),
......@@ -51,7 +52,8 @@ class HMHRetriever:
for name_date_tuple in name_date_list:
if len(name_date_tuple) == 2:
result[self.search_fields_list[4][0]] = {
self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
# self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
self.words_str: self.fix_hava_str,
self.position_str: bbox
}
is_find_name_date = True
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!