import re import numpy as np from fuzzywuzzy import fuzz from shapely.geometry import Polygon class Finder: def __init__(self, pdf_info, ocr_results): self.pdf_info = pdf_info self.ocr_results = ocr_results self.is_asp = False self.item = {"words": None, "position": None, } self.cn_re = re.compile(u'[\u4e00-\u9fa5]') def gen_init_result(self, is_asp): # 格式化算法输出 self.init_result = {"page_1": {"合同编号": self.item, "所购车辆价格": self.item, "车架号": self.item, "贷款本金金额": {"大写": self.item, "小写": self.item, "车辆贷款本金金额": self.item, "附加产品融资贷款本金总金额": self.item, }, "贷款期限": self.item, "附加产品融资贷款本金总金额明细": self.item, "借款人签字及时间": self.item, }, "page_2": {"合同编号": self.item, "借款人及抵押人": {"name": self.item, "id": self.item, }, "共同借款人及共同抵押人": {"name": self.item, "id": self.item, }, "保证人1": {"name": self.item, "id": self.item, }, "保证人2": {"name": self.item, "id": self.item, }, "所购车辆价格": self.item, "车架号": self.item, "经销商": self.item, "贷款本金金额": {"大写": self.item, "小写": self.item, "车辆贷款本金金额": self.item, "附加产品融资贷款本金总金额": self.item, }, "贷款期限": self.item, "标准利率": self.item, "借款人收款账户": {"账号": self.item, "户名": self.item, "开户行": self.item, }, "还款账户": {"账号": self.item, "户名": self.item, "开户行": self.item, }, }, "page_3": {"合同编号": self.item, "还款计划表": self.item, "车辆代理商": self.item, }, "page_4": {"合同编号": self.item, "附加产品融资贷款本金总金额明细": self.item, }, "page_5": {"合同编号": self.item, }, "page_6": {"合同编号": self.item, }, } if self.is_asp: self.init_result["page_7"] = {"合同编号": self.item, } self.init_result["page_8"] = {"合同编号": self.item, "主借人签字": {"签字": self.item, "日期": self.item, }, "共借人签字": {"签字": self.item, "日期": self.item, }, "保证人1签字": {"签字": self.item, "日期": self.item, }, "保证人2签字": {"签字": self.item, "日期": self.item, }, "见证人签字": {"签字": self.item, "日期": self.item, }, } else: self.init_result["page_7"] = {"合同编号": self.item, "主借人签字": {"签字": self.item, "日期": self.item, }, "共借人签字": {"签字": self.item, "日期": self.item, }, "保证人1签字": {"签字": self.item, "日期": self.item, }, "保证人2签字": {"签字": self.item, "日期": self.item, }, "见证人签字": {"签字": self.item, "日期": self.item, }, } def get_top_iou(self, poly, ocr_result): """传入一个多边形, 找到与之最匹配的多边形 Args: poly (TYPE): Description """ iou_list = [] for key in ocr_result: bbox, text = ocr_result[key] g = Polygon(np.array(bbox).reshape((-1, 2))) p = Polygon(np.array(poly).reshape((-1, 2))) if not g.is_valid or not p.is_valid: continue inter = Polygon(g).intersection(Polygon(p)).area union = g.area + p.area - inter iou = inter/union iou_list.append([iou, key]) if len(iou_list) == 0: return -1, -1 top_iou = sorted(iou_list, key=lambda x: x[0])[-1] return top_iou def poly_to_rectangle(self, poly): xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly bbox = [xmin, ymin, xmax, ymax] return bbox def get_contract_no(self, page_num): """传入页码,查看该页码右上角的编号 Args: page_num (string): Returns: sting: """ contract_no = self.item.copy() # contract_no['words'] = '' # contract_no['position'] = [-1, -1, -1, -1] # 只看第一页 for key in self.ocr_results[page_num]: bbox, text = self.ocr_results[page_num][key] if '合同编号:' in text: words = text.split(':')[-1] location = self.poly_to_rectangle(bbox) contract_no['words'] = words contract_no['position'] = location return contract_no def get_vehicle_price(self, page_num='0'): vehicle_price = self.item.copy() # vehicle_price['words'] = '' # vehicle_price['position'] = [-1, -1, -1, -1] for key in self.ocr_results[page_num]: bbox, text = self.ocr_results[page_num][key] if '所购车辆价格为人民币' in text: words = text.split('币')[-1] location = self.poly_to_rectangle(bbox) vehicle_price['words'] = words vehicle_price['position'] = location return vehicle_price def get_vin(self, page_num='0'): vin = self.item.copy() # vin['words'] = '' # vin['position'] = [-1, -1, -1, -1] for key in self.ocr_results[page_num]: bbox, text = self.ocr_results[page_num][key] if '车架号:' in text: words = text.split(':')[-1] location = self.poly_to_rectangle(bbox) vin['words'] = words vin['position'] = location return vin def cn_char_filter(self, src_str): cn_chars = re.findall(self.cn_re, src_str) cn_str = ''.join(cn_chars) return cn_str def get_loan_principal(self, page_num='0'): chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] upper = self.item.copy() lower = self.item.copy() asp_1 = self.item.copy() asp_2 = self.item.copy() anchor_bbox = None for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if fuzz.ratio(''.join(chinese_keywords), self.cn_char_filter(text)) >= 10: text = text.split(':')[-1].strip() upper['position'] = bbox upper['words'] = text if '小写:¥' in text: words = text.split('¥')[-1].strip() lower['position'] = bbox lower['words'] = words if '附加产品融资贷款本金总金额' == text: anchor_bbox = bbox if anchor_bbox: for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] asp_1['position'] = bbox asp_1['words'] = words if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] asp_2['position'] = bbox asp_2['words'] = words return upper, lower, asp_1, asp_2 def get_loan_term(self, page_num='0'): loan_term = self.item.copy() all_text = '' for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] all_text += text matchs = re.search(r'贷款期限(\d+)个月', all_text) if matchs: words = matchs.group(1) for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'{words}个月' in text: loan_term['position'] = bbox loan_term['words'] = words return loan_term def get_standard_rate(self, page_num='0'): standard_rate = self.item.copy() for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text) if matchs: standard_rate['position'] = bbox standard_rate['words'] = matchs.group(1) return standard_rate def mergelist(self, text_list): pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 mergeindex = -1 for index, i in enumerate(text_list): if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0: # if '所购' in i and '.00' not in text_list[index+1]: mergeindex = index if mergeindex == -1: return text_list else: new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] return self.mergelist(new_text_list) def get_asp_details(self, page_num): asp_details_table_term = self.item.copy() asp_details_table = [['附加产品融资贷款本金总金额及贷款利率明细'], ['项目1', '用途总金额2', '贷款本金3']] bbox_xm = None bbox_ytzje = None bbox_dkbj = None bbox_total = None for key in self.ocr_results[page_num]: bbox, text = self.ocr_results[page_num][key] if text == '项目1': bbox_xm = bbox if text == '用途总金额2': bbox_ytzje = bbox if text == '贷款本金3': bbox_dkbj = bbox if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']: bbox_total = bbox if bbox_xm: for i in range(10): rh = abs(bbox_xm[1]-bbox_xm[-1]) anchor = np.array(bbox_xm).reshape((-1 ,2)) anchor[:, 1] += int(rh*1.4) _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) if _iou > 0: bbox, xm_text = self.ocr_results[page_num][_key] bbox_xm = bbox # 解决项目内容是两行的问题 if not '所购' in xm_text: line = asp_details_table[-1] line[0] += xm_text asp_details_table[-1] = line continue # print(xm_text) anchor_1 = [bbox_ytzje[0], bbox[1], bbox_ytzje[2], bbox[3], bbox_ytzje[4], bbox[5], bbox_ytzje[6], bbox[7]] _iou, _key = self.get_top_iou(poly=anchor_1, ocr_result=self.ocr_results[page_num]) bbox, ytzje_text = self.ocr_results[page_num][_key] # print(ytzje_text) anchor_2 = [bbox_dkbj[0], bbox[1], bbox_dkbj[2], bbox[3], bbox_dkbj[4], bbox[5], bbox_dkbj[6], bbox[7]] _iou, _key = self.get_top_iou(poly=anchor_2, ocr_result=self.ocr_results[page_num]) bbox, dkbj_text = self.ocr_results[page_num][_key] # print(dkbj_text) if xm_text == ytzje_text: xm_text, ytzje_text = xm_text.split(' ') line = [xm_text, ytzje_text, dkbj_text] asp_details_table.append(line) else: break if bbox_total: anchor = [bbox_dkbj[0], bbox_total[1], bbox_dkbj[2], bbox_total[3], bbox_dkbj[4], bbox_total[5], bbox_dkbj[6], bbox_total[7]] _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) bbox, total_text = self.ocr_results[page_num][_key] asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) asp_details_table_term['words'] = asp_details_table return asp_details_table_term def get_signature(self): signature = self.item.copy() for block in self.pdf_info['0']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '签署日期' in text: words = text signature['words'] = words signature['position'] = bbox return signature def get_somebody(self, top, bottom): # 指定上下边界后,返回上下边界内的客户信息 _name = self.item.copy() _id = self.item.copy() # 只看第一页,先划定上下边界 y_top = 0 y_bottom = 0 for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if top in text: y_top = bbox[3] if bottom in text: y_bottom = bbox[3] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if y_top < bbox[3] < y_bottom: # print(top, bottom, text) if '姓名/名称' in text: words = text.split(':')[-1] _name['position'] = bbox _name['words'] = words if '自然人身份证件号码/法人执照号码' in text: words = text.split(':')[-1] _id['position'] = bbox _id['words'] = words return _name, _id def get_seller(self): seller = self.item.copy() # 先找到 key anchor_bbox = None for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if text in ['经销商', '车辆销售方']: anchor_bbox = bbox # 当找到了 key, 则根据 key 去匹配 value if anchor_bbox: half_width = self.pdf_info['1']['width'] * 0.5 for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: seller['position'] = bbox seller['words'] = text return seller def get_cldls(self): seller = self.item.copy() # 先找到 key anchor_bbox = None for block in self.pdf_info['2']['blocks']: if anchor_bbox is not None: break if block['type'] != 0: continue for line in block['lines']: if anchor_bbox is not None: break for span in line['spans']: bbox, text = span['bbox'], span['text'] if text.strip() == '车辆代理商': anchor_bbox = bbox # print(anchor_bbox) # 当找到了 key, 则根据 key 去匹配 value if anchor_bbox: half_width = self.pdf_info['2']['width'] * 0.5 for block in self.pdf_info['2']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: seller['position'] = bbox seller['words'] = text return seller return seller def get_borrower_collection_account(self): account = self.item.copy() account_name = self.item.copy() account_bank = self.item.copy() all_text = '' for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] all_text += text # 首先确定账户信息是哪种,我们只输出非另行通知的格式 if '借款人收款账户' in all_text: all_text = all_text.replace(' ', '').replace(' ', '') matchs_1 = re.findall(r'账号:(.*?)户名', all_text) if matchs_1: words = matchs_1[0] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'{words}' in text: account['position'] = bbox account['words'] = words matchs_2 = re.findall(r'户名:(.*?)开户行', all_text) if matchs_2: words = matchs_2[0] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'{words}' in text: account_name['position'] = bbox account_name['words'] = words matchs_3 = re.findall(r'开户行:(.*?)借款人', all_text) if matchs_3: words = matchs_3[0] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'{words}' in text: account_bank['position'] = bbox account_bank['words'] = words return account, account_name, account_bank def get_payback_account(self): account = self.item.copy() account_name = self.item.copy() account_bank = self.item.copy() all_text = '' for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] all_text += text # 首先确定账户信息是哪种,我们只输出非另行通知的格式 if '(13) 还款账户' in all_text: all_text = all_text.split('(13) 还款账户')[-1] all_text = all_text.replace(' ', '').replace(' ', '') matchs_1 = re.findall(r'账号:(.*?)户名', all_text) if matchs_1: words = matchs_1[0] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'{words}' in text: account['position'] = bbox account['words'] = words matchs_2 = re.findall(r'户名:(.*?)开户行', all_text) if matchs_2: words = matchs_2[0] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'{words}' in text: account_name['position'] = bbox account_name['words'] = words matchs_3 = re.findall(r'开户行:(.*?);', all_text) if matchs_3: words = matchs_3[0] for block in self.pdf_info['1']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if f'开户行:{words};' in text.replace(' ', ''): account_bank['position'] = bbox account_bank['words'] = words return account, account_name, account_bank def get_repayment_schedule(self): repayment_schedule = self.item.copy() # 只看第二页 repayment_schedule_table = [] repayment_schedule_text_list = [] table = False for block in self.pdf_info['2']['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '序号' == text: table = True if '以上表格中所列的序号并非还款期数' in text: table = False if table == True: repayment_schedule_text_list.append(text) for i in range(len(repayment_schedule_text_list)//5): line = [] # 5表示5列的意思 for j in range(5): line.append(repayment_schedule_text_list[i*5+j]) if str(i+1) == line[1]: break repayment_schedule_table.append(line) if len(repayment_schedule_table) > 0: repayment_schedule['words'] = repayment_schedule_table return repayment_schedule def get_signature_role_1(self): signature_role_1 = self.init_item.copy() # 先定位签字区域 texts = [] boxes = [] page_num = None position = None words = None region = False for i in list(self.pdf_info.keys()): for block in self.pdf_info[i]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '借款人(抵押人)' in text: region = True if '日期' in text: region = False if region == True: page_num = i texts.append(text) boxes.append(bbox) if len(texts) > 4: words = '有' else: words = '无' boxes = np.array(boxes).reshape((-1, 2)) position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] signature_role_1['page_num'] = page_num signature_role_1['position'] = position signature_role_1['words'] = words return signature_role_1 def get_signature_role_2(self): signature_role_2 = self.init_item.copy() # 先定位签字区域 texts = [] boxes = [] page_num = None position = None words = None region = False for i in list(self.pdf_info.keys()): for block in self.pdf_info[i]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '共同借款人(共同抵押人)' in text: region = True if '日期' in text: region = False if region == True: page_num = i texts.append(text) boxes.append(bbox) if len(texts) > 4: words = '有' else: words = '无' boxes = np.array(boxes).reshape((-1, 2)) position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] signature_role_2['page_num'] = page_num signature_role_2['position'] = position signature_role_2['words'] = words return signature_role_2 def get_signature_role_3(self): signature_role_3 = self.init_item.copy() # 先定位签字区域 texts = [] boxes = [] page_num = None position = None words = None region = False for i in list(self.pdf_info.keys()): for block in self.pdf_info[i]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '保证人1' in text and int(i) != 0: region = True if '日期' in text: region = False if region == True: page_num = i texts.append(text) boxes.append(bbox) if len(texts) > 4: words = '有' else: words = '无' boxes = np.array(boxes).reshape((-1, 2)) position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] signature_role_3['page_num'] = page_num signature_role_3['position'] = position signature_role_3['words'] = words return signature_role_3 def get_signature_role_4(self): signature_role_4 = self.init_item.copy() # 先定位签字区域 texts = [] boxes = [] page_num = None position = None words = None region = False for i in list(self.pdf_info.keys()): for block in self.pdf_info[i]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '保证人2' in text and int(i) != 0: region = True if '日期' in text: region = False if region == True: page_num = i texts.append(text) boxes.append(bbox) if len(texts) > 4: words = '有' else: words = '无' boxes = np.array(boxes).reshape((-1, 2)) position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] signature_role_4['page_num'] = page_num signature_role_4['position'] = position signature_role_4['words'] = words return signature_role_4 def get_signature_role_5(self): signature_role_5 = self.init_item.copy() # 先定位签字区域 texts = [] boxes = [] page_num = None position = None words = None region = False for i in list(self.pdf_info.keys()): for block in self.pdf_info[i]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '见证人签字' in text and int(i) != 0: region = True if '年' in text: region = False if region == True: page_num = i texts.append(text) boxes.append(bbox) print(texts) if len(texts) > 4: words = '有' else: words = '无' boxes = np.array(boxes).reshape((-1, 2)) position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] signature_role_5['page_num'] = page_num signature_role_5['position'] = position signature_role_5['words'] = words return signature_role_5 def get_last_page_signature(self, page_num, top, bottom): signature_name = self.item.copy() signature_date = self.item.copy() anchor_top = None anchor_bottom = None for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if top in text: anchor_top = bbox[1] if bottom in text: anchor_bottom = bbox[1] # print(top, anchor_top, anchor_bottom) if anchor_top is not None and anchor_bottom is not None: for block in self.pdf_info[page_num]['blocks']: if block['type'] != 0: continue for line in block['lines']: for span in line['spans']: bbox, text = span['bbox'], span['text'] if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): name = text.split(' ')[0] date = text.split(':')[-1] signature_name['words'] = name signature_name['position'] = bbox signature_date['words'] = date signature_date['position'] = bbox return signature_name, signature_date def get_info(self): """ block['type'] == 0 : 表示该元素为图片 Returns: dict: Description """ # 先判断是否为 ASP 产品 # 只看第一页,判断是否有 '附加产品融资贷款本金总金额' 这一句话,若有则为 ASP 产品 # print(self.pdf_info['0']['blocks']) # for block in self.pdf_info['0']['blocks']: # if block['type'] != 0: # continue # for line in block['lines']: # for span in line['spans']: # bbox, text = span['bbox'], span['text'] # if '附加产品融资贷款本金总金额' == text: # self.is_asp = True for key in self.ocr_results['0']: bbox, text = self.ocr_results['0'][key] if '附加产品融资贷款本金总金额' in text: self.is_asp = True self.gen_init_result(self.is_asp) if len(list(self.ocr_results.keys())) <= 8: # 8.5 版本客户提供的样本出现串页的情况,暂时无法识别 # Page 1 # 找合同编号 contract_no = self.get_contract_no(page_num='0') # print(contract_no) self.init_result['page_1']['合同编号'] = contract_no # 所购车辆价格 vehicle_price = self.get_vehicle_price() # print(vehicle_price) self.init_result['page_1']['所购车辆价格'] = vehicle_price # 车架号 vin = self.get_vin() # print(vin) self.init_result['page_1']['车架号'] = vin # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目 upper, lower, asp_1, asp_2 = self.get_loan_principal() # print(upper, lower, asp_1, asp_2) self.init_result['page_1']['贷款本金金额']['大写'] = upper self.init_result['page_1']['贷款本金金额']['小写'] = lower self.init_result['page_1']['贷款本金金额']['车辆贷款本金金额'] = asp_1 self.init_result['page_1']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2 # 贷款期限 loan_term = self.get_loan_term() # print(loan_term) self.init_result['page_1']['贷款期限'] = loan_term # 附加产品融资贷款本金总金额明细(ASP-表格) asp_details_table = self.get_asp_details(page_num='0') # print(asp_details_table) self.init_result['page_1']['附加产品融资贷款本金总金额明细'] = asp_details_table # 借款人签字及时间 signature = self.get_signature() # print(signature) self.init_result['page_1']['借款人签字及时间'] = signature ####################################### # Page 2 # 找合同编号 contract_no = self.get_contract_no(page_num='0') # print(contract_no) self.init_result['page_2']['合同编号'] = contract_no # 找借款人及抵押人(地址字段原本有空格) borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:') # 这是为了同时兼容 8.1 版本 if borrower_name['words'] == None: borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') # 这是为了兼容车贷分离版本 if borrower_name['words'] == None: borrower_name, borrower_id = self.get_somebody(top='借款人:', bottom='共同借款人及抵押人:') # print(borrower_name, borrower_id) self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id # 找共同借款人及共同抵押人 co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:') # print(co_borrower_name, co_borrower_id) self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id # 保证人1 first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:') self.init_result['page_2']['保证人1']['name'] = first_guarantor_name self.init_result['page_2']['保证人1']['id'] = first_guarantor_id # 保证人2 second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章') self.init_result['page_2']['保证人2']['name'] = second_guarantor_name self.init_result['page_2']['保证人2']['id'] = second_guarantor_id # 所购车辆价格 vehicle_price = self.get_vehicle_price(page_num='1') # print(vehicle_price) self.init_result['page_2']['所购车辆价格'] = vehicle_price # 车架号 vin = self.get_vin(page_num='1') # print(vin) self.init_result['page_2']['车架号'] = vin # 经销商 seller = self.get_seller() # print(seller) self.init_result['page_2']['经销商'] = seller # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目 upper, lower, asp_1, asp_2 = self.get_loan_principal(page_num='1') # print(upper, lower, asp_1, asp_2) self.init_result['page_2']['贷款本金金额']['大写'] = upper self.init_result['page_2']['贷款本金金额']['小写'] = lower self.init_result['page_2']['贷款本金金额']['车辆贷款本金金额'] = asp_1 self.init_result['page_2']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2 # 贷款期限 loan_term = self.get_loan_term(page_num='1') # print(loan_term) self.init_result['page_2']['贷款期限'] = loan_term # 本合同当期的标准利率 standard_rate = self.get_standard_rate(page_num='1') # print(standard_rate) self.init_result['page_2']['标准利率'] = standard_rate # 202212 release 新增借款人收款账户 account, account_name, account_bank = self.get_borrower_collection_account() # print(account, account_name, account_bank) self.init_result['page_2']['借款人收款账户']['账号'] = account self.init_result['page_2']['借款人收款账户']['户名'] = account_name self.init_result['page_2']['借款人收款账户']['开户行'] = account_bank # 还款账户 account, account_name, account_bank = self.get_payback_account() # print(account, account_name, account_bank) self.init_result['page_2']['还款账户']['账号'] = account self.init_result['page_2']['还款账户']['户名'] = account_name self.init_result['page_2']['还款账户']['开户行'] = account_bank ####################################### # Page 3 # 找合同编号 contract_no = self.get_contract_no(page_num='2') self.init_result['page_3']['合同编号'] = contract_no # 还款计划表(表格) repayment_schedule_table = self.get_repayment_schedule() # print(repayment_schedule_table) self.init_result['page_3']['还款计划表'] = repayment_schedule_table # 车辆代理商 cldls = self.get_cldls() self.init_result['page_3']['车辆代理商'] = cldls ####################################### # Page 4 # 找合同编号 contract_no = self.get_contract_no(page_num='3') # print(contract_no) self.init_result['page_4']['合同编号'] = contract_no # 附加产品融资贷款本金总金额明细(ASP-表格) asp_details_table = self.get_asp_details(page_num='3') # print(asp_details_table) self.init_result['page_4']['附加产品融资贷款本金总金额明细'] = asp_details_table ####################################### # Page 5 # 找合同编号 contract_no = self.get_contract_no(page_num='4') # print(contract_no) self.init_result['page_5']['合同编号'] = contract_no ####################################### # Page 6 # 找合同编号 contract_no = self.get_contract_no(page_num='5') # print(contract_no) self.init_result['page_6']['合同编号'] = contract_no if self.is_asp: # Page 7 # 找合同编号 contract_no = self.get_contract_no(page_num='6') self.init_result['page_7']['合同编号'] = contract_no # Page 8 # 找合同编号 contract_no = self.get_contract_no(page_num='7') self.init_result['page_8']['合同编号'] = contract_no signature_name, signature_date = self.get_last_page_signature(page_num='7', top='合同编号', bottom='共同借款人') if signature_name['words'] == None: signature_name, signature_date = self.get_last_page_signature(page_num='7', top='合同编号', bottom='共同借款人(抵押人)') # print(signature_name, signature_date) self.init_result['page_8']['主借人签字']['签字'] = signature_name self.init_result['page_8']['主借人签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='7', top='共同借款人', bottom='保证人1') if signature_name['words'] == None: signature_name, signature_date = self.get_last_page_signature(page_num='7', top='共同借款人(抵押人)', bottom='保证人1') # print(signature_name, signature_date) self.init_result['page_8']['共借人签字']['签字'] = signature_name self.init_result['page_8']['共借人签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='7', top='保证人1', bottom='保证人2') self.init_result['page_8']['保证人1签字']['签字'] = signature_name self.init_result['page_8']['保证人1签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='7', top='保证人2', bottom='在本人面前亲笔签署本合同') self.init_result['page_8']['保证人2签字']['签字'] = signature_name self.init_result['page_8']['保证人2签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='7', top='在本人面前亲笔签署本合同', bottom='以下无正文') # print(signature_name, signature_date) self.init_result['page_8']['见证人签字']['签字'] = signature_name self.init_result['page_8']['见证人签字']['日期'] = signature_date else: # Page 7 # 找合同编号 contract_no = self.get_contract_no(page_num='6') self.init_result['page_7']['合同编号'] = contract_no signature_name, signature_date = self.get_last_page_signature(page_num='6', top='合同编号', bottom='共同借款人') if signature_name['words'] == None: signature_name, signature_date = self.get_last_page_signature(page_num='6', top='合同编号', bottom='共同借款人(抵押人)') # print(signature_name, signature_date) self.init_result['page_7']['主借人签字']['签字'] = signature_name self.init_result['page_7']['主借人签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='6', top='共同借款人', bottom='保证人1') if signature_name['words'] == None: signature_name, signature_date = self.get_last_page_signature(page_num='6', top='共同借款人(抵押人)', bottom='保证人1') # print(signature_name, signature_date) self.init_result['page_7']['共借人签字']['签字'] = signature_name self.init_result['page_7']['共借人签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='6', top='保证人1', bottom='保证人2') self.init_result['page_7']['保证人1签字']['签字'] = signature_name self.init_result['page_7']['保证人1签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='6', top='保证人2', bottom='在本人面前亲笔签署本合同') self.init_result['page_7']['保证人2签字']['签字'] = signature_name self.init_result['page_7']['保证人2签字']['日期'] = signature_date signature_name, signature_date = self.get_last_page_signature(page_num='6', top='在本人面前亲笔签署本合同', bottom='以下无正文') # print(signature_name, signature_date) self.init_result['page_7']['见证人签字']['签字'] = signature_name self.init_result['page_7']['见证人签字']['日期'] = signature_date # 重新定制输出 new_results = {"is_asp": self.is_asp, "page_info": self.init_result } return new_results