fdb7ca98 by 周伟奇

fix format

1 parent 87525e99
......@@ -3,7 +3,7 @@
# @Email : 9428.al@gmail.com
# @Create Date : 2021-07-20 16:42:41
# @Last Modified : 2021-09-07 19:52:39
# @Description :
# @Description :
import re
import numpy as np
......@@ -17,107 +17,110 @@ class Finder:
self.is_asp = False
self.item = {"words": None,
"position": None,
}
}
def gen_init_result(self, is_asp):
# 格式化算法输出
self.init_result = {"page_1": {"合同编号": self.item,
"所购车辆价格": self.item,
"车架号": self.item,
"贷款本金金额": {"大写": self.item,
"小写": self.item,
"车辆贷款本金金额": self.item,
"附加产品融资贷款本金总金额": self.item,
},
"小写": self.item,
"车辆贷款本金金额": self.item,
"附加产品融资贷款本金总金额": self.item,
},
"贷款期限": self.item,
"附加产品融资贷款本金总金额明细": self.item,
"借款人签字及时间": self.item,
},
},
"page_2": {"合同编号": self.item,
"借款人及抵押人": {"name": self.item,
"id": self.item,
},
"共同借款人及共同抵押人": {"name": self.item,
"id": self.item,
},
"共同借款人及共同抵押人": {"name": self.item,
"id": self.item,
},
"保证人1": {"name": self.item,
"id": self.item,
},
"id": self.item,
},
"保证人2": {"name": self.item,
"id": self.item,
},
"id": self.item,
},
"所购车辆价格": self.item,
"车架号": self.item,
"经销商": self.item,
"贷款本金金额": {"大写": self.item,
"小写": self.item,
"车辆贷款本金金额": self.item,
"附加产品融资贷款本金总金额": self.item,
},
"小写": self.item,
"车辆贷款本金金额": self.item,
"附加产品融资贷款本金总金额": self.item,
},
"贷款期限": self.item,
"还款账户": {"账号": self.item,
"户名": self.item,
"开户行": self.item,
},
},
"户名": self.item,
"开户行": self.item,
},
},
"page_3": {"合同编号": self.item,
"还款计划表": self.item,
},
},
"page_4": {"合同编号": self.item,
"附加产品融资贷款本金总金额明细": self.item,
},
},
"page_5": {"合同编号": self.item,
},
},
"page_6": {"合同编号": self.item,
},
},
}
if self.is_asp == False:
self.init_result["page_7"] = {"合同编号": self.item,
"主借人签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"共借人签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"保证人1签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"保证人2签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"见证人签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
}
else:
self.init_result["page_7"] = {"合同编号": self.item,
}
self.init_result["page_8"] = {"合同编号": self.item,
"主借人签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"共借人签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"保证人1签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"保证人2签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
"见证人签字": {"签字": self.item,
"日期": self.item,
},
"日期": self.item,
},
}
def poly_to_rectangle(self, poly):
xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly
bbox = [xmin, ymin, xmax, ymax]
return bbox
def get_contract_no(self, page_num):
"""传入页码,查看该页码右上角的编号
Args:
page_num (string):
Returns:
sting:
"""
......@@ -133,6 +136,7 @@ class Finder:
contract_no['words'] = words
contract_no['position'] = location
return contract_no
def get_vehicle_price(self, page_num='0'):
vehicle_price = self.item.copy()
# vehicle_price['words'] = ''
......@@ -145,6 +149,7 @@ class Finder:
vehicle_price['words'] = words
vehicle_price['position'] = location
return vehicle_price
def get_vin(self, page_num='0'):
vin = self.item.copy()
# vin['words'] = ''
......@@ -157,6 +162,7 @@ class Finder:
vin['words'] = words
vin['position'] = location
return vin
def get_loan_principal(self, page_num='0'):
chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
'佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
......@@ -197,6 +203,7 @@ class Finder:
asp_2['position'] = bbox
asp_2['words'] = words
return upper, lower, asp_1, asp_2
def get_loan_term(self, page_num='0'):
loan_term = self.item.copy()
all_text = ''
......@@ -220,18 +227,21 @@ class Finder:
loan_term['position'] = bbox
loan_term['words'] = words
return loan_term
def mergelist(self, text_list):
pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符
mergeindex = -1
for index, i in enumerate(text_list):
if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0:
# if '所购' in i and '.00' not in text_list[index+1]:
# if '所购' in i and '.00' not in text_list[index+1]:
mergeindex = index
if mergeindex == -1:
return text_list
else:
new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:]
return self.mergelist(new_text_list)
new_text_list = text_list[:mergeindex] + [
text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:]
return self.mergelist(new_text_list)
def get_asp_details(self, page_num):
asp_details_table_term = self.item.copy()
asp_details_table = []
......@@ -262,6 +272,7 @@ class Finder:
if len(asp_details_table) > 0:
asp_details_table_term['words'] = asp_details_table
return asp_details_table_term
def get_signature(self):
signature = self.item.copy()
for block in self.pdf_info['0']['blocks']:
......@@ -275,6 +286,7 @@ class Finder:
signature['words'] = words
signature['position'] = bbox
return signature
def get_somebody(self, top, bottom):
# 指定上下边界后,返回上下边界内的客户信息
_name = self.item.copy()
......@@ -309,6 +321,7 @@ class Finder:
_id['position'] = bbox
_id['words'] = words
return _name, _id
def get_seller(self):
seller = self.item.copy()
# 先找到 key
......@@ -330,11 +343,12 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \
anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]:
seller['position'] = bbox
seller['words'] = text
return seller
def get_payback_account(self):
account = self.item.copy()
account_name = self.item.copy()
......@@ -387,6 +401,7 @@ class Finder:
account_bank['position'] = bbox
account_bank['words'] = words
return account, account_name, account_bank
def get_repayment_schedule(self):
repayment_schedule = self.item.copy()
# 只看第二页
......@@ -416,6 +431,7 @@ class Finder:
if len(repayment_schedule_table) > 0:
repayment_schedule['words'] = repayment_schedule_table
return repayment_schedule
def get_signature_role_1(self):
signature_role_1 = self.init_item.copy()
# 先定位签字区域
......@@ -445,11 +461,13 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_1['page_num'] = page_num
signature_role_1['position'] = position
signature_role_1['words'] = words
return signature_role_1
def get_signature_role_2(self):
signature_role_2 = self.init_item.copy()
# 先定位签字区域
......@@ -479,11 +497,13 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_2['page_num'] = page_num
signature_role_2['position'] = position
signature_role_2['words'] = words
return signature_role_2
def get_signature_role_3(self):
signature_role_3 = self.init_item.copy()
# 先定位签字区域
......@@ -513,11 +533,13 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_3['page_num'] = page_num
signature_role_3['position'] = position
signature_role_3['words'] = words
return signature_role_3
def get_signature_role_4(self):
signature_role_4 = self.init_item.copy()
# 先定位签字区域
......@@ -547,11 +569,13 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_4['page_num'] = page_num
signature_role_4['position'] = position
signature_role_4['words'] = words
return signature_role_4
def get_signature_role_5(self):
signature_role_5 = self.init_item.copy()
# 先定位签字区域
......@@ -582,11 +606,13 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_5['page_num'] = page_num
signature_role_5['position'] = position
signature_role_5['words'] = words
return signature_role_5
def get_last_page_signature(self, page_num, top, bottom):
signature_name = self.item.copy()
signature_date = self.item.copy()
......@@ -610,7 +636,7 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom):
if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
name = text.split(' ')[0]
date = text.split(':')[-1]
signature_name['words'] = name
......@@ -618,10 +644,11 @@ class Finder:
signature_date['words'] = date
signature_date['position'] = bbox
return signature_name, signature_date
def get_info(self):
"""
block['type'] == 0 : 表示该元素为图片
Returns:
dict: Description
"""
......@@ -672,22 +699,27 @@ class Finder:
contract_no = self.get_contract_no(page_num='0')
self.init_result['page_2']['合同编号'] = contract_no
# 找借款人及抵押人(地址字段原本有空格)
borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:')
borrower_name, borrower_id = self.get_somebody(
top='借款人及抵押人:', bottom='共同借款人:')
# 这是为了同时兼容 8.1 版本
if borrower_name['words'] == None:
borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:')
borrower_name, borrower_id = self.get_somebody(
top='借款人及抵押人:', bottom='共同借款人及共同抵押人:')
self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name
self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id
# 找共同借款人及共同抵押人
co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:')
co_borrower_name, co_borrower_id = self.get_somebody(
top='共同借款人:', bottom='保证人1:')
self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name
self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id
# 保证人1
first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:')
first_guarantor_name, first_guarantor_id = self.get_somebody(
top='保证人1:', bottom='保证人2:')
self.init_result['page_2']['保证人1']['name'] = first_guarantor_name
self.init_result['page_2']['保证人1']['id'] = first_guarantor_id
# 保证人2
second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章')
second_guarantor_name, second_guarantor_id = self.get_somebody(
top='保证人2:', bottom='第一章')
self.init_result['page_2']['保证人2']['name'] = second_guarantor_name
self.init_result['page_2']['保证人2']['id'] = second_guarantor_id
# 所购车辆价格
......@@ -745,23 +777,23 @@ class Finder:
contract_no = self.get_contract_no(page_num='6')
self.init_result['page_7']['合同编号'] = contract_no
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='合同编号', bottom='共同借款人')
top='合同编号', bottom='共同借款人')
self.init_result['page_7']['主借人签字']['签字'] = signature_name
self.init_result['page_7']['主借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='共同借款人', bottom='保证人1')
top='共同借款人', bottom='保证人1')
self.init_result['page_7']['共借人签字']['签字'] = signature_name
self.init_result['page_7']['共借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='保证人1', bottom='保证人2')
top='保证人1', bottom='保证人2')
self.init_result['page_7']['保证人1签字']['签字'] = signature_name
self.init_result['page_7']['保证人1签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='保证人2', bottom='在本人面前亲笔签署本合同')
top='保证人2', bottom='在本人面前亲笔签署本合同')
self.init_result['page_7']['保证人2签字']['签字'] = signature_name
self.init_result['page_7']['保证人2签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='6',
top='在本人面前亲笔签署本合同', bottom='以下无正文')
top='在本人面前亲笔签署本合同', bottom='以下无正文')
self.init_result['page_7']['见证人签字']['签字'] = signature_name
self.init_result['page_7']['见证人签字']['日期'] = signature_date
else:
......@@ -774,27 +806,27 @@ class Finder:
contract_no = self.get_contract_no(page_num='7')
self.init_result['page_8']['合同编号'] = contract_no
signature_name, signature_date = self.get_last_page_signature(page_num='7',
top='合同编号', bottom='共同借款人')
top='合同编号', bottom='共同借款人')
self.init_result['page_8']['主借人签字']['签字'] = signature_name
self.init_result['page_8']['主借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='7',
top='共同借款人', bottom='保证人1')
top='共同借款人', bottom='保证人1')
self.init_result['page_8']['共借人签字']['签字'] = signature_name
self.init_result['page_8']['共借人签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='7',
top='保证人1', bottom='保证人2')
top='保证人1', bottom='保证人2')
self.init_result['page_8']['保证人1签字']['签字'] = signature_name
self.init_result['page_8']['保证人1签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='7',
top='保证人2', bottom='在本人面前亲笔签署本合同')
top='保证人2', bottom='在本人面前亲笔签署本合同')
self.init_result['page_8']['保证人2签字']['签字'] = signature_name
self.init_result['page_8']['保证人2签字']['日期'] = signature_date
signature_name, signature_date = self.get_last_page_signature(page_num='7',
top='在本人面前亲笔签署本合同', bottom='以下无正文')
top='在本人面前亲笔签署本合同', bottom='以下无正文')
self.init_result['page_8']['见证人签字']['签字'] = signature_name
self.init_result['page_8']['见证人签字']['日期'] = signature_date
# 重新定制输出
new_results = {"is_asp": self.is_asp,
"page_info": self.init_result
}
}
return new_results
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!