93c7cc0a by 周伟奇

KWOM_July

1 parent b10ff66a
......@@ -25,6 +25,15 @@ def extract_info(ocr_results):
def predict(pdf_info, is_qrs=False, is_fsm=False):
pop_seceond_page_info = {}
if not is_fsm and not is_qrs and len(pdf_info) == 9:
pop_seceond_page_info = pdf_info.pop('1', {})
for pno in range(8):
if pno == 0:
pdf_info[str(pno)]['blocks'].extend(pop_seceond_page_info.get('blocks', []))
else:
pdf_info[str(pno)] = pdf_info.pop(str(pno+1))
ocr_results = {}
for pno in pdf_info:
ocr_results[pno] = {}
......
......@@ -13,6 +13,7 @@ class Finder:
self.item = {"words": None,
"position": None,
}
self.cn_re = re.compile(u'[\u4e00-\u9fa5]')
def gen_init_result(self, is_asp):
# 格式化算法输出
......@@ -187,6 +188,11 @@ class Finder:
vin['position'] = location
return vin
def cn_char_filter(self, src_str):
cn_chars = re.findall(self.cn_re, src_str)
cn_str = ''.join(cn_chars)
return cn_str
def get_loan_principal(self, page_num='0'):
chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
'佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
......@@ -201,7 +207,7 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if fuzz.ratio(''.join(chinese_keywords), text) > 15:
if fuzz.ratio(''.join(chinese_keywords), self.cn_char_filter(text)) >= 10:
text = text.split(':')[-1].strip()
upper['position'] = bbox
upper['words'] = text
......
......@@ -31,6 +31,7 @@ def get_table_info(bbox_1, bbox_2, ocr_result):
class Finder:
def __init__(self, pdf_info):
self.pdf_info = pdf_info
self.item = {"words": None,
......@@ -42,12 +43,15 @@ class Finder:
"承租人-姓名": self.item,
"承租人-证件号码": self.item,
"承租人-法定代表人或授权代表": self.item,
"共同承租人-姓名": self.item,
"共同承租人-证件号码": self.item,
"共同承租人-法定代表人或授权代表": self.item,
"保证人1-姓名": self.item,
"保证人1-证件号码": self.item,
"保证人1-法定代表人或授权代表": self.item,
"保证人2-姓名": self.item,
"保证人2-证件号码": self.item,
"保证人2-法定代表人或授权代表": self.item,
......@@ -67,15 +71,19 @@ class Finder:
"银行账户-开户行": self.item,
"签字页-承租人姓名": self.item,
"签字页-承租人签章": self.item,
"签字页-共同承租人姓名": self.item,
"签字页-共同承租人签章": self.item,
"签字页-保证人1姓名": self.item,
"签字页-保证人1签章": self.item,
"签字页-保证人2姓名": self.item,
"签字页-保证人2签章": self.item,
"签字页-保证人3姓名": self.item,
"签字页-保证人3签章": self.item,
}
# 格式化输出 车辆处置协议 要是别的字段
self.init_result_1 = {"合同编号": self.item,
"承租人-姓名": self.item,
......@@ -88,6 +96,7 @@ class Finder:
"签字页-销售经销商": self.item,
"签字页-销售经销商签章": self.item,
}
# 格式化输出 车辆租赁抵押合同
self.init_result_2 = {"合同编号": self.item,
"合同编号(正文)": self.item,
......@@ -174,6 +183,7 @@ class Finder:
# contract_no['words'] = words
contract_no['words'] = re.sub("\s", "", words).replace(")", "")
return contract_no
matchObj = re.search(r'编号为(.*?)的', all_text)
if matchObj:
words = matchObj.group(1).strip()
......@@ -182,6 +192,7 @@ class Finder:
# contract_no['words'] = words
contract_no['words'] = re.sub("\s", "", words).replace(")", "")
return contract_no
matchObj = re.search(r'编号为(.*?))的', all_text)
if matchObj:
words = matchObj.group(1).strip()
......@@ -291,6 +302,7 @@ class Finder:
def get_asp_details(self, page_num):
asp_details_table_term = self.item.copy()
asp_details_table = []
asp_details_text_list = []
table = False
......@@ -306,20 +318,25 @@ class Finder:
table = False
if table == True:
asp_details_text_list.append(text)
for i in range((len(asp_details_text_list) + 2) // 3):
for i in range((len(asp_details_text_list)+2)//3):
line = []
if i == 0:
line = [asp_details_text_list[0]]
else:
for j in range(3):
line.append(asp_details_text_list[i * 3 - 2 + j])
line.append(asp_details_text_list[i*3-2+j])
asp_details_table.append(line)
if len(asp_details_table) > 0:
asp_details_table_term['words'] = asp_details_table
return asp_details_table_term
def get_signature(self):
signature = self.item.copy()
for block in self.pdf_info['0']['blocks']:
if block['type'] != 0:
continue
......@@ -448,6 +465,7 @@ class Finder:
def get_repayment_schedule(self):
repayment_schedule = self.item.copy()
repayment_schedule_text_list = []
table = False
page = None
......@@ -475,6 +493,7 @@ class Finder:
if not left < bbox[0] < right:
continue
repayment_schedule_text_list.append(text)
if text.strip() == "61.":
page = pno
table = True
......@@ -482,14 +501,17 @@ class Finder:
# print("repayment_schedule_text_list = ", repayment_schedule_text_list)
# repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
repayment_schedule_table = [['序号', '租金']]
for i in range(len(repayment_schedule_text_list) // 4):
line = [f'{i + 1}.']
for i in range(len(repayment_schedule_text_list)//4):
line = [f'{i+1}.']
# 4表示4列的意思
for j in range(4):
line.append(repayment_schedule_text_list[i * 4 + j])
line.append(repayment_schedule_text_list[i*4+j])
# 只保留序号和租金列
line = [line[0].replace('.', ''), line[3]]
repayment_schedule_table.append(line)
repayment_schedule['words'] = repayment_schedule_table
repayment_schedule['page'] = page
return repayment_schedule
......@@ -538,7 +560,8 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_2['page_num'] = page_num
signature_role_2['position'] = position
signature_role_2['words'] = words
......@@ -573,7 +596,8 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_3['page_num'] = page_num
signature_role_3['position'] = position
signature_role_3['words'] = words
......@@ -608,7 +632,8 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_4['page_num'] = page_num
signature_role_4['position'] = position
signature_role_4['words'] = words
......@@ -644,7 +669,8 @@ class Finder:
else:
words = '无'
boxes = np.array(boxes).reshape((-1, 2))
position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])]
position = [min(boxes[:, 0]), min(boxes[:, 1]),
max(boxes[:, 0]), max(boxes[:, 1])]
signature_role_5['page_num'] = page_num
signature_role_5['position'] = position
signature_role_5['words'] = words
......@@ -717,6 +743,7 @@ class Finder:
name = self.item.copy()
id_num = self.item.copy()
representative = self.item.copy()
# 以保证人3 的左上角为定位点
anchor = None
for block in self.pdf_info[page_num]['blocks']:
......@@ -728,6 +755,7 @@ class Finder:
# 找到角色姓名
if re.match('保证人3', text) is not None:
anchor = [bbox[0], bbox[1]]
if anchor is not None:
for block in self.pdf_info[page_num]['blocks']:
if block['type'] != 0:
......@@ -743,60 +771,52 @@ class Finder:
name['position'] = bbox
if role_key == '承租人:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人1:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人2:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人3:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
......@@ -805,6 +825,7 @@ class Finder:
def get_table_add_product(self):
table_add_product = self.item.copy()
add_product_page_num = None
for pno in self.pdf_info:
for block in self.pdf_info[f'{pno}']['blocks']:
......@@ -825,11 +846,14 @@ class Finder:
xmin, ymin, xmax, ymax = bbox
bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
ocr_results.append([bbox, text])
lines = [['项目', '购买价格', '实际融资金额']]
key_xm = None
key_gmjg = None
key_sjrzje = None
key_total = None
for index, span in enumerate(ocr_results):
if span[1] == '项目':
key_xm = index
......@@ -839,22 +863,29 @@ class Finder:
key_sjrzje = index
if span[1] == '总计':
key_total = index
bbox, text = ocr_results[key_xm]
rh = abs(bbox[1] - bbox[-1])
rh = abs(bbox[1]-bbox[-1])
anchor = np.array(bbox).reshape((-1, 2))
anchor[:, 0] += 2 * rh
anchor[:, 0] += 2*rh
anchor[:, 1] += rh
for i in range(5):
for span in ocr_results:
iou = caculate_iou(anchor, span[0])
if iou > 0.01 and span[1].strip() != '所购':
x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results)
y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results)
x = get_table_info(
span[0], ocr_results[key_gmjg][0], ocr_results)
y = get_table_info(
span[0], ocr_results[key_sjrzje][0], ocr_results)
line = [span[1].replace('\u3000', ' '), x, y]
# print(line)
lines.append(line)
anchor = np.array(span[0]).reshape((-1, 2))
anchor[:, 1] += rh
total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
total = get_table_info(
ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
lines.append(['总计', '', total])
# 所购 BMW悦然焕
......@@ -880,7 +911,6 @@ class Finder:
if '事故维修补' in line[0]:
line[0] = '所购 事故维修补偿方案'
filtered_lines.append(line)
table_add_product['words'] = filtered_lines
table_add_product['page'] = add_product_page_num
table_add_product['position'] = None
......@@ -889,6 +919,7 @@ class Finder:
def get_contract_no_dy(self):
# 查找抵押合同编号
contract_no = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -899,6 +930,7 @@ class Finder:
bbox, text = span['bbox'], span['text']
if '抵押合同编号' in text:
key_box = bbox
if key_box is not None:
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -916,6 +948,7 @@ class Finder:
def get_dyr_name_id(self):
name = self.item.copy()
_id = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -928,7 +961,7 @@ class Finder:
key_box = bbox
if key_box is not None:
rh = abs(key_box[1] - key_box[3])
rh = abs(key_box[1]-key_box[3])
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
......@@ -936,12 +969,12 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text:
if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
words = text.split(':')[-1]
name['position'] = bbox
name['page'] = pno
name['words'] = words
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text:
if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
words = text.split(':')[-1]
_id['position'] = bbox
_id['page'] = pno
......@@ -951,6 +984,7 @@ class Finder:
def get_dyrpo_name_id(self):
name = self.item.copy()
_id = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -961,8 +995,9 @@ class Finder:
bbox, text = span['bbox'], span['text']
if text == '抵押人配偶(如适':
key_box = bbox
if key_box is not None:
rh = abs(key_box[1] - key_box[3])
rh = abs(key_box[1]-key_box[3])
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
......@@ -970,13 +1005,13 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text:
if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
words = text.split(':')[-1]
name['position'] = bbox
name['page'] = pno
name['words'] = words
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text:
words = text.split(':')[-1]
if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
words = text.split(':')[-1].strip()
_id['position'] = bbox
_id['page'] = pno
_id['words'] = words
......@@ -984,6 +1019,7 @@ class Finder:
def get_key_value_position(self, key):
value = self.item.copy()
key_box = None
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
......@@ -994,8 +1030,9 @@ class Finder:
bbox, text = span['bbox'], span['text']
if text == key:
key_box = bbox
if key_box is not None:
rh = abs(key_box[1] - key_box[3])
rh = abs(key_box[1]-key_box[3])
for pno in self.pdf_info:
for block in self.pdf_info[pno]['blocks']:
if block['type'] != 0:
......@@ -1003,9 +1040,8 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(
key_box[2] - bbox[0]) < rh * 10:
words = text
if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
words = text.strip()
value['position'] = bbox
value['page'] = pno
value['words'] = words
......@@ -1015,6 +1051,7 @@ class Finder:
name = self.item.copy()
id_num = self.item.copy()
representative = self.item.copy()
# 以保证人2 的左上角为定位点
anchor = None
for block in self.pdf_info[page_num]['blocks']:
......@@ -1026,6 +1063,7 @@ class Finder:
# 找到角色姓名
if re.match('保证人2', text) is not None:
anchor = [bbox[0], bbox[1]]
if anchor is not None:
for block in self.pdf_info[page_num]['blocks']:
if block['type'] != 0:
......@@ -1041,60 +1079,52 @@ class Finder:
name['position'] = bbox
if role_key == '承租人一:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '共同承租人:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人1:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) < anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人2:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(
bbox[1::2]) > anchor[1]:
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
representative['words'] = words
representative['page'] = page_num
......@@ -1137,6 +1167,7 @@ class Finder:
# 取 Page 1 上的合同编号
contract_no = self.get_contract_no(page_num='0')
self.init_result['合同编号'] = contract_no
# 粗略判断是否是 ‘车贷分离版本’ 的合同
is_cdfl = False
for block in self.pdf_info['0']['blocks']:
......@@ -1147,60 +1178,81 @@ class Finder:
bbox, text = span['bbox'], span['text']
if '共同承租人:' in text:
is_cdfl = True
if is_cdfl == False:
# 从第一页上取四个角色的姓名和证件号码
name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0')
name, id_num, representative = self.get_role_info(
role_key='承租人:', page_num='0')
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='承租人一:', page_num='0')
self.init_result['承租人-姓名'] = name
self.init_result['承租人-证件号码'] = id_num
self.init_result['承租人-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info(role_key='保证人1:', page_num='0')
name, id_num, representative = self.get_role_info(
role_key='保证人1:', page_num='0')
self.init_result['保证人1-姓名'] = name
self.init_result['保证人1-证件号码'] = id_num
self.init_result['保证人1-法定代表人或授权代表'] = representative
# if条件判别 对应3_3版本
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='共同承租人:', page_num='0')
self.init_result['共同承租人-姓名'] = name
self.init_result['共同承租人-证件号码'] = id_num
self.init_result['共同承租人-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0')
name, id_num, representative = self.get_role_info(
role_key='保证人2:', page_num='0')
self.init_result['保证人2-姓名'] = name
self.init_result['保证人2-证件号码'] = id_num
self.init_result['保证人2-法定代表人或授权代表'] = representative
# if条件判别 对应3_3版本
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='保证人1:', page_num='0')
self.init_result['保证人2-姓名'] = name
self.init_result['保证人2-证件号码'] = id_num
self.init_result['保证人2-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0')
name, id_num, representative = self.get_role_info(
role_key='保证人3:', page_num='0')
self.init_result['保证人3-姓名'] = name
self.init_result['保证人3-证件号码'] = id_num
self.init_result['保证人3-法定代表人或授权代表'] = representative
if name["words"] == None:
name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='保证人2:', page_num='0')
self.init_result['保证人3-姓名'] = name
self.init_result['保证人3-证件号码'] = id_num
self.init_result['保证人3-法定代表人或授权代表'] = representative
else:
name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='承租人一:', page_num='0')
self.init_result['承租人-姓名'] = name
self.init_result['承租人-证件号码'] = id_num
self.init_result['承租人-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='共同承租人:', page_num='0')
self.init_result['共同承租人-姓名'] = name
self.init_result['共同承租人-证件号码'] = id_num
self.init_result['共同承租人-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='保证人1:', page_num='0')
self.init_result['保证人1-姓名'] = name
self.init_result['保证人1-证件号码'] = id_num
self.init_result['保证人1-法定代表人或授权代表'] = representative
name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0')
name, id_num, representative = self.get_role_info_3_3(
role_key='保证人2:', page_num='0')
self.init_result['保证人2-姓名'] = name
self.init_result['保证人2-证件号码'] = id_num
self.init_result['保证人2-法定代表人或授权代表'] = representative
# 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出
contract_no = self.get_contract_no_one()
self.init_result['合同编号(正文)'] = contract_no
......@@ -1211,7 +1263,8 @@ class Finder:
seller = self.get_key_value(key='车辆卖方(经销商):')
self.init_result['车辆卖方(经销商)'] = seller
# 找到 —— 车辆原始销售价格
vehicle_price = self.get_key_value(key='车辆原始销售价格(《机动车销售统一发票》所列金额):')
vehicle_price = self.get_key_value(
key='车辆原始销售价格(《机动车销售统一发票》所列金额):')
self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price
# 找车辆附加产品明细(表)
table_add_product = self.get_table_add_product()
......@@ -1232,66 +1285,85 @@ class Finder:
self.init_result['银行账户-银行账号'] = account
bank = self.get_key_value(key='开户银行:')
self.init_result['银行账户-开户行'] = bank
# 找签字页上的系列信息
# 承租人姓名、签章
if is_cdfl == False:
name = self.get_key_value(key='承租人姓名:')
electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:')
electronic_signature = self.get_electronic_signature(
top='承租人姓名:', bottom='保证人1姓名:')
if name["words"] == None:
name = self.get_key_value(key='承租人一姓名:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:')
electronic_signature = self.get_electronic_signature(
top='承租人一姓名:', bottom='共同承租人名称:')
self.init_result['签字页-承租人姓名'] = name
self.init_result['签字页-承租人签章'] = electronic_signature
# 保证人1姓名、签章
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
electronic_signature = self.get_electronic_signature(
top='保证人1姓名:', bottom='保证人2姓名:')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
# 这里用的是 name["words"] == ""
if name["words"] == "":
name = self.get_key_value(key='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:')
electronic_signature = self.get_electronic_signature(
top='共同承租人名称:', bottom='保证人1姓名:')
self.init_result['签字页-共同承租人姓名'] = name
self.init_result['签字页-共同承租人签章'] = electronic_signature
# 保证人2姓名、签章
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:')
electronic_signature = self.get_electronic_signature(
top='保证人2姓名:', bottom='保证人3姓名:')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
# if判断条件对应3_3版本
if name["words"] == "":
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
electronic_signature = self.get_electronic_signature(
top='保证人1姓名:', bottom='保证人2姓名:')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
# 保证人3姓名、签章
name = self.get_key_value(key='保证人3姓名:')
electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:')
electronic_signature = self.get_electronic_signature(
top='保证人3姓名:', bottom='日期:')
self.init_result['签字页-保证人3姓名'] = name
self.init_result['签字页-保证人3签章'] = electronic_signature
# if判断条件对应3_3版本
if name["words"] == None:
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:')
electronic_signature = self.get_electronic_signature(
top='保证人2姓名:', bottom='日期:')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
else:
name = self.get_key_value(key='承租人一姓名:')
electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:')
electronic_signature = self.get_electronic_signature(
top='承租人一姓名:', bottom='共同承租人名称:')
self.init_result['签字页-承租人姓名'] = name
self.init_result['签字页-承租人签章'] = electronic_signature
name = self.get_key_value(key='共同承租人名称:')
electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:')
electronic_signature = self.get_electronic_signature(
top='共同承租人名称:', bottom='保证人1姓名:')
self.init_result['签字页-共同承租人姓名'] = name
self.init_result['签字页-共同承租人签章'] = electronic_signature
name = self.get_key_value(key='保证人1姓名:')
electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:')
electronic_signature = self.get_electronic_signature(
top='保证人1姓名:', bottom='保证人2姓名:')
self.init_result['签字页-保证人1姓名'] = name
self.init_result['签字页-保证人1签章'] = electronic_signature
name = self.get_key_value(key='保证人2姓名:')
electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:')
electronic_signature = self.get_electronic_signature(
top='保证人2姓名:', bottom='保证人3姓名:')
self.init_result['签字页-保证人2姓名'] = name
self.init_result['签字页-保证人2签章'] = electronic_signature
return self.init_result
def get_info_1(self):
......@@ -1307,7 +1379,8 @@ class Finder:
# 销售经销商
seller = self.get_key_value(key='销售经销商:', page_num='0')
if seller['words'] == "":
seller = self.get_value_by_findall('销售经销商:', '地址:', page_num='0')
seller = self.get_value_by_findall(
'销售经销商:', '地址:', page_num='0')
self.init_result_1['销售经销商'] = seller
# 合同编号(正文)
contract_no = self.get_contract_no_one()
......@@ -1325,7 +1398,8 @@ class Finder:
seller = self.get_key_value(key='销售经销商:')
if seller['words'] == "":
# 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章)
seller = self.get_value_by_findall('销售经销商:', '(请授权代表签字并请盖章)', page_num='3')
seller = self.get_value_by_findall(
'销售经销商:', '(请授权代表签字并请盖章)', page_num='3')
self.init_result_1['签字页-销售经销商'] = seller
# 经销商签章
pass
......@@ -1357,12 +1431,14 @@ class Finder:
self.init_result_2['融资租赁期限'] = lease_term
# 签字页抵押人姓名和签章
name = self.get_key_value(key='抵押人姓名:')
electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:')
electronic_signature = self.get_electronic_signature(
top='抵押权人盖章', bottom='抵押人配偶姓名:')
self.init_result_2['签字页-抵押人姓名'] = name
self.init_result_2['签字页-抵押人签章'] = electronic_signature
# 签字页抵押人配偶姓名和签章
name = self.get_key_value(key='抵押人配偶姓名:')
electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期')
electronic_signature = self.get_electronic_signature(
top='抵押人配偶姓名:', bottom='日期')
self.init_result_2['签字页-抵押人配偶姓名'] = name
self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
return self.init_result_2
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!