fix format
Showing
1 changed file
with
108 additions
and
76 deletions
| ... | @@ -3,7 +3,7 @@ | ... | @@ -3,7 +3,7 @@ |
| 3 | # @Email : 9428.al@gmail.com | 3 | # @Email : 9428.al@gmail.com |
| 4 | # @Create Date : 2021-07-20 16:42:41 | 4 | # @Create Date : 2021-07-20 16:42:41 |
| 5 | # @Last Modified : 2021-09-07 19:52:39 | 5 | # @Last Modified : 2021-09-07 19:52:39 |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | import re | 8 | import re |
| 9 | import numpy as np | 9 | import numpy as np |
| ... | @@ -17,107 +17,110 @@ class Finder: | ... | @@ -17,107 +17,110 @@ class Finder: |
| 17 | self.is_asp = False | 17 | self.is_asp = False |
| 18 | self.item = {"words": None, | 18 | self.item = {"words": None, |
| 19 | "position": None, | 19 | "position": None, |
| 20 | } | 20 | } |
| 21 | |||
| 21 | def gen_init_result(self, is_asp): | 22 | def gen_init_result(self, is_asp): |
| 22 | # 格式化算法输出 | 23 | # 格式化算法输出 |
| 23 | self.init_result = {"page_1": {"合同编号": self.item, | 24 | self.init_result = {"page_1": {"合同编号": self.item, |
| 24 | "所购车辆价格": self.item, | 25 | "所购车辆价格": self.item, |
| 25 | "车架号": self.item, | 26 | "车架号": self.item, |
| 26 | "贷款本金金额": {"大写": self.item, | 27 | "贷款本金金额": {"大写": self.item, |
| 27 | "小写": self.item, | 28 | "小写": self.item, |
| 28 | "车辆贷款本金金额": self.item, | 29 | "车辆贷款本金金额": self.item, |
| 29 | "附加产品融资贷款本金总金额": self.item, | 30 | "附加产品融资贷款本金总金额": self.item, |
| 30 | }, | 31 | }, |
| 31 | "贷款期限": self.item, | 32 | "贷款期限": self.item, |
| 32 | "附加产品融资贷款本金总金额明细": self.item, | 33 | "附加产品融资贷款本金总金额明细": self.item, |
| 33 | "借款人签字及时间": self.item, | 34 | "借款人签字及时间": self.item, |
| 34 | }, | 35 | }, |
| 35 | "page_2": {"合同编号": self.item, | 36 | "page_2": {"合同编号": self.item, |
| 36 | "借款人及抵押人": {"name": self.item, | 37 | "借款人及抵押人": {"name": self.item, |
| 38 | "id": self.item, | ||
| 39 | }, | ||
| 40 | "共同借款人及共同抵押人": {"name": self.item, | ||
| 37 | "id": self.item, | 41 | "id": self.item, |
| 38 | }, | 42 | }, |
| 39 | "共同借款人及共同抵押人": {"name": self.item, | ||
| 40 | "id": self.item, | ||
| 41 | }, | ||
| 42 | "保证人1": {"name": self.item, | 43 | "保证人1": {"name": self.item, |
| 43 | "id": self.item, | 44 | "id": self.item, |
| 44 | }, | 45 | }, |
| 45 | "保证人2": {"name": self.item, | 46 | "保证人2": {"name": self.item, |
| 46 | "id": self.item, | 47 | "id": self.item, |
| 47 | }, | 48 | }, |
| 48 | "所购车辆价格": self.item, | 49 | "所购车辆价格": self.item, |
| 49 | "车架号": self.item, | 50 | "车架号": self.item, |
| 50 | "经销商": self.item, | 51 | "经销商": self.item, |
| 51 | "贷款本金金额": {"大写": self.item, | 52 | "贷款本金金额": {"大写": self.item, |
| 52 | "小写": self.item, | 53 | "小写": self.item, |
| 53 | "车辆贷款本金金额": self.item, | 54 | "车辆贷款本金金额": self.item, |
| 54 | "附加产品融资贷款本金总金额": self.item, | 55 | "附加产品融资贷款本金总金额": self.item, |
| 55 | }, | 56 | }, |
| 56 | "贷款期限": self.item, | 57 | "贷款期限": self.item, |
| 57 | "还款账户": {"账号": self.item, | 58 | "还款账户": {"账号": self.item, |
| 58 | "户名": self.item, | 59 | "户名": self.item, |
| 59 | "开户行": self.item, | 60 | "开户行": self.item, |
| 60 | }, | 61 | }, |
| 61 | }, | 62 | }, |
| 62 | "page_3": {"合同编号": self.item, | 63 | "page_3": {"合同编号": self.item, |
| 63 | "还款计划表": self.item, | 64 | "还款计划表": self.item, |
| 64 | }, | 65 | }, |
| 65 | "page_4": {"合同编号": self.item, | 66 | "page_4": {"合同编号": self.item, |
| 66 | "附加产品融资贷款本金总金额明细": self.item, | 67 | "附加产品融资贷款本金总金额明细": self.item, |
| 67 | }, | 68 | }, |
| 68 | "page_5": {"合同编号": self.item, | 69 | "page_5": {"合同编号": self.item, |
| 69 | }, | 70 | }, |
| 70 | "page_6": {"合同编号": self.item, | 71 | "page_6": {"合同编号": self.item, |
| 71 | }, | 72 | }, |
| 72 | } | 73 | } |
| 73 | if self.is_asp == False: | 74 | if self.is_asp == False: |
| 74 | self.init_result["page_7"] = {"合同编号": self.item, | 75 | self.init_result["page_7"] = {"合同编号": self.item, |
| 75 | "主借人签字": {"签字": self.item, | 76 | "主借人签字": {"签字": self.item, |
| 76 | "日期": self.item, | 77 | "日期": self.item, |
| 77 | }, | 78 | }, |
| 78 | "共借人签字": {"签字": self.item, | 79 | "共借人签字": {"签字": self.item, |
| 79 | "日期": self.item, | 80 | "日期": self.item, |
| 80 | }, | 81 | }, |
| 81 | "保证人1签字": {"签字": self.item, | 82 | "保证人1签字": {"签字": self.item, |
| 82 | "日期": self.item, | 83 | "日期": self.item, |
| 83 | }, | 84 | }, |
| 84 | "保证人2签字": {"签字": self.item, | 85 | "保证人2签字": {"签字": self.item, |
| 85 | "日期": self.item, | 86 | "日期": self.item, |
| 86 | }, | 87 | }, |
| 87 | "见证人签字": {"签字": self.item, | 88 | "见证人签字": {"签字": self.item, |
| 88 | "日期": self.item, | 89 | "日期": self.item, |
| 89 | }, | 90 | }, |
| 90 | } | 91 | } |
| 91 | else: | 92 | else: |
| 92 | self.init_result["page_7"] = {"合同编号": self.item, | 93 | self.init_result["page_7"] = {"合同编号": self.item, |
| 93 | } | 94 | } |
| 94 | self.init_result["page_8"] = {"合同编号": self.item, | 95 | self.init_result["page_8"] = {"合同编号": self.item, |
| 95 | "主借人签字": {"签字": self.item, | 96 | "主借人签字": {"签字": self.item, |
| 96 | "日期": self.item, | 97 | "日期": self.item, |
| 97 | }, | 98 | }, |
| 98 | "共借人签字": {"签字": self.item, | 99 | "共借人签字": {"签字": self.item, |
| 99 | "日期": self.item, | 100 | "日期": self.item, |
| 100 | }, | 101 | }, |
| 101 | "保证人1签字": {"签字": self.item, | 102 | "保证人1签字": {"签字": self.item, |
| 102 | "日期": self.item, | 103 | "日期": self.item, |
| 103 | }, | 104 | }, |
| 104 | "保证人2签字": {"签字": self.item, | 105 | "保证人2签字": {"签字": self.item, |
| 105 | "日期": self.item, | 106 | "日期": self.item, |
| 106 | }, | 107 | }, |
| 107 | "见证人签字": {"签字": self.item, | 108 | "见证人签字": {"签字": self.item, |
| 108 | "日期": self.item, | 109 | "日期": self.item, |
| 109 | }, | 110 | }, |
| 110 | } | 111 | } |
| 112 | |||
| 111 | def poly_to_rectangle(self, poly): | 113 | def poly_to_rectangle(self, poly): |
| 112 | xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly | 114 | xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly |
| 113 | bbox = [xmin, ymin, xmax, ymax] | 115 | bbox = [xmin, ymin, xmax, ymax] |
| 114 | return bbox | 116 | return bbox |
| 117 | |||
| 115 | def get_contract_no(self, page_num): | 118 | def get_contract_no(self, page_num): |
| 116 | """传入页码,查看该页码右上角的编号 | 119 | """传入页码,查看该页码右上角的编号 |
| 117 | 120 | ||
| 118 | Args: | 121 | Args: |
| 119 | page_num (string): | 122 | page_num (string): |
| 120 | 123 | ||
| 121 | Returns: | 124 | Returns: |
| 122 | sting: | 125 | sting: |
| 123 | """ | 126 | """ |
| ... | @@ -133,6 +136,7 @@ class Finder: | ... | @@ -133,6 +136,7 @@ class Finder: |
| 133 | contract_no['words'] = words | 136 | contract_no['words'] = words |
| 134 | contract_no['position'] = location | 137 | contract_no['position'] = location |
| 135 | return contract_no | 138 | return contract_no |
| 139 | |||
| 136 | def get_vehicle_price(self, page_num='0'): | 140 | def get_vehicle_price(self, page_num='0'): |
| 137 | vehicle_price = self.item.copy() | 141 | vehicle_price = self.item.copy() |
| 138 | # vehicle_price['words'] = '' | 142 | # vehicle_price['words'] = '' |
| ... | @@ -145,6 +149,7 @@ class Finder: | ... | @@ -145,6 +149,7 @@ class Finder: |
| 145 | vehicle_price['words'] = words | 149 | vehicle_price['words'] = words |
| 146 | vehicle_price['position'] = location | 150 | vehicle_price['position'] = location |
| 147 | return vehicle_price | 151 | return vehicle_price |
| 152 | |||
| 148 | def get_vin(self, page_num='0'): | 153 | def get_vin(self, page_num='0'): |
| 149 | vin = self.item.copy() | 154 | vin = self.item.copy() |
| 150 | # vin['words'] = '' | 155 | # vin['words'] = '' |
| ... | @@ -157,6 +162,7 @@ class Finder: | ... | @@ -157,6 +162,7 @@ class Finder: |
| 157 | vin['words'] = words | 162 | vin['words'] = words |
| 158 | vin['position'] = location | 163 | vin['position'] = location |
| 159 | return vin | 164 | return vin |
| 165 | |||
| 160 | def get_loan_principal(self, page_num='0'): | 166 | def get_loan_principal(self, page_num='0'): |
| 161 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', | 167 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', |
| 162 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] | 168 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] |
| ... | @@ -197,6 +203,7 @@ class Finder: | ... | @@ -197,6 +203,7 @@ class Finder: |
| 197 | asp_2['position'] = bbox | 203 | asp_2['position'] = bbox |
| 198 | asp_2['words'] = words | 204 | asp_2['words'] = words |
| 199 | return upper, lower, asp_1, asp_2 | 205 | return upper, lower, asp_1, asp_2 |
| 206 | |||
| 200 | def get_loan_term(self, page_num='0'): | 207 | def get_loan_term(self, page_num='0'): |
| 201 | loan_term = self.item.copy() | 208 | loan_term = self.item.copy() |
| 202 | all_text = '' | 209 | all_text = '' |
| ... | @@ -220,18 +227,21 @@ class Finder: | ... | @@ -220,18 +227,21 @@ class Finder: |
| 220 | loan_term['position'] = bbox | 227 | loan_term['position'] = bbox |
| 221 | loan_term['words'] = words | 228 | loan_term['words'] = words |
| 222 | return loan_term | 229 | return loan_term |
| 230 | |||
| 223 | def mergelist(self, text_list): | 231 | def mergelist(self, text_list): |
| 224 | pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 | 232 | pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 |
| 225 | mergeindex = -1 | 233 | mergeindex = -1 |
| 226 | for index, i in enumerate(text_list): | 234 | for index, i in enumerate(text_list): |
| 227 | if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0: | 235 | if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0: |
| 228 | # if '所购' in i and '.00' not in text_list[index+1]: | 236 | # if '所购' in i and '.00' not in text_list[index+1]: |
| 229 | mergeindex = index | 237 | mergeindex = index |
| 230 | if mergeindex == -1: | 238 | if mergeindex == -1: |
| 231 | return text_list | 239 | return text_list |
| 232 | else: | 240 | else: |
| 233 | new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] | 241 | new_text_list = text_list[:mergeindex] + [ |
| 234 | return self.mergelist(new_text_list) | 242 | text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] |
| 243 | return self.mergelist(new_text_list) | ||
| 244 | |||
| 235 | def get_asp_details(self, page_num): | 245 | def get_asp_details(self, page_num): |
| 236 | asp_details_table_term = self.item.copy() | 246 | asp_details_table_term = self.item.copy() |
| 237 | asp_details_table = [] | 247 | asp_details_table = [] |
| ... | @@ -262,6 +272,7 @@ class Finder: | ... | @@ -262,6 +272,7 @@ class Finder: |
| 262 | if len(asp_details_table) > 0: | 272 | if len(asp_details_table) > 0: |
| 263 | asp_details_table_term['words'] = asp_details_table | 273 | asp_details_table_term['words'] = asp_details_table |
| 264 | return asp_details_table_term | 274 | return asp_details_table_term |
| 275 | |||
| 265 | def get_signature(self): | 276 | def get_signature(self): |
| 266 | signature = self.item.copy() | 277 | signature = self.item.copy() |
| 267 | for block in self.pdf_info['0']['blocks']: | 278 | for block in self.pdf_info['0']['blocks']: |
| ... | @@ -275,6 +286,7 @@ class Finder: | ... | @@ -275,6 +286,7 @@ class Finder: |
| 275 | signature['words'] = words | 286 | signature['words'] = words |
| 276 | signature['position'] = bbox | 287 | signature['position'] = bbox |
| 277 | return signature | 288 | return signature |
| 289 | |||
| 278 | def get_somebody(self, top, bottom): | 290 | def get_somebody(self, top, bottom): |
| 279 | # 指定上下边界后,返回上下边界内的客户信息 | 291 | # 指定上下边界后,返回上下边界内的客户信息 |
| 280 | _name = self.item.copy() | 292 | _name = self.item.copy() |
| ... | @@ -309,6 +321,7 @@ class Finder: | ... | @@ -309,6 +321,7 @@ class Finder: |
| 309 | _id['position'] = bbox | 321 | _id['position'] = bbox |
| 310 | _id['words'] = words | 322 | _id['words'] = words |
| 311 | return _name, _id | 323 | return _name, _id |
| 324 | |||
| 312 | def get_seller(self): | 325 | def get_seller(self): |
| 313 | seller = self.item.copy() | 326 | seller = self.item.copy() |
| 314 | # 先找到 key | 327 | # 先找到 key |
| ... | @@ -330,11 +343,12 @@ class Finder: | ... | @@ -330,11 +343,12 @@ class Finder: |
| 330 | for line in block['lines']: | 343 | for line in block['lines']: |
| 331 | for span in line['spans']: | 344 | for span in line['spans']: |
| 332 | bbox, text = span['bbox'], span['text'] | 345 | bbox, text = span['bbox'], span['text'] |
| 333 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | 346 | if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \ |
| 334 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | 347 | anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]: |
| 335 | seller['position'] = bbox | 348 | seller['position'] = bbox |
| 336 | seller['words'] = text | 349 | seller['words'] = text |
| 337 | return seller | 350 | return seller |
| 351 | |||
| 338 | def get_payback_account(self): | 352 | def get_payback_account(self): |
| 339 | account = self.item.copy() | 353 | account = self.item.copy() |
| 340 | account_name = self.item.copy() | 354 | account_name = self.item.copy() |
| ... | @@ -387,6 +401,7 @@ class Finder: | ... | @@ -387,6 +401,7 @@ class Finder: |
| 387 | account_bank['position'] = bbox | 401 | account_bank['position'] = bbox |
| 388 | account_bank['words'] = words | 402 | account_bank['words'] = words |
| 389 | return account, account_name, account_bank | 403 | return account, account_name, account_bank |
| 404 | |||
| 390 | def get_repayment_schedule(self): | 405 | def get_repayment_schedule(self): |
| 391 | repayment_schedule = self.item.copy() | 406 | repayment_schedule = self.item.copy() |
| 392 | # 只看第二页 | 407 | # 只看第二页 |
| ... | @@ -416,6 +431,7 @@ class Finder: | ... | @@ -416,6 +431,7 @@ class Finder: |
| 416 | if len(repayment_schedule_table) > 0: | 431 | if len(repayment_schedule_table) > 0: |
| 417 | repayment_schedule['words'] = repayment_schedule_table | 432 | repayment_schedule['words'] = repayment_schedule_table |
| 418 | return repayment_schedule | 433 | return repayment_schedule |
| 434 | |||
| 419 | def get_signature_role_1(self): | 435 | def get_signature_role_1(self): |
| 420 | signature_role_1 = self.init_item.copy() | 436 | signature_role_1 = self.init_item.copy() |
| 421 | # 先定位签字区域 | 437 | # 先定位签字区域 |
| ... | @@ -445,11 +461,13 @@ class Finder: | ... | @@ -445,11 +461,13 @@ class Finder: |
| 445 | else: | 461 | else: |
| 446 | words = '无' | 462 | words = '无' |
| 447 | boxes = np.array(boxes).reshape((-1, 2)) | 463 | boxes = np.array(boxes).reshape((-1, 2)) |
| 448 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 464 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 465 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 449 | signature_role_1['page_num'] = page_num | 466 | signature_role_1['page_num'] = page_num |
| 450 | signature_role_1['position'] = position | 467 | signature_role_1['position'] = position |
| 451 | signature_role_1['words'] = words | 468 | signature_role_1['words'] = words |
| 452 | return signature_role_1 | 469 | return signature_role_1 |
| 470 | |||
| 453 | def get_signature_role_2(self): | 471 | def get_signature_role_2(self): |
| 454 | signature_role_2 = self.init_item.copy() | 472 | signature_role_2 = self.init_item.copy() |
| 455 | # 先定位签字区域 | 473 | # 先定位签字区域 |
| ... | @@ -479,11 +497,13 @@ class Finder: | ... | @@ -479,11 +497,13 @@ class Finder: |
| 479 | else: | 497 | else: |
| 480 | words = '无' | 498 | words = '无' |
| 481 | boxes = np.array(boxes).reshape((-1, 2)) | 499 | boxes = np.array(boxes).reshape((-1, 2)) |
| 482 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 500 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 501 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 483 | signature_role_2['page_num'] = page_num | 502 | signature_role_2['page_num'] = page_num |
| 484 | signature_role_2['position'] = position | 503 | signature_role_2['position'] = position |
| 485 | signature_role_2['words'] = words | 504 | signature_role_2['words'] = words |
| 486 | return signature_role_2 | 505 | return signature_role_2 |
| 506 | |||
| 487 | def get_signature_role_3(self): | 507 | def get_signature_role_3(self): |
| 488 | signature_role_3 = self.init_item.copy() | 508 | signature_role_3 = self.init_item.copy() |
| 489 | # 先定位签字区域 | 509 | # 先定位签字区域 |
| ... | @@ -513,11 +533,13 @@ class Finder: | ... | @@ -513,11 +533,13 @@ class Finder: |
| 513 | else: | 533 | else: |
| 514 | words = '无' | 534 | words = '无' |
| 515 | boxes = np.array(boxes).reshape((-1, 2)) | 535 | boxes = np.array(boxes).reshape((-1, 2)) |
| 516 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 536 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 537 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 517 | signature_role_3['page_num'] = page_num | 538 | signature_role_3['page_num'] = page_num |
| 518 | signature_role_3['position'] = position | 539 | signature_role_3['position'] = position |
| 519 | signature_role_3['words'] = words | 540 | signature_role_3['words'] = words |
| 520 | return signature_role_3 | 541 | return signature_role_3 |
| 542 | |||
| 521 | def get_signature_role_4(self): | 543 | def get_signature_role_4(self): |
| 522 | signature_role_4 = self.init_item.copy() | 544 | signature_role_4 = self.init_item.copy() |
| 523 | # 先定位签字区域 | 545 | # 先定位签字区域 |
| ... | @@ -547,11 +569,13 @@ class Finder: | ... | @@ -547,11 +569,13 @@ class Finder: |
| 547 | else: | 569 | else: |
| 548 | words = '无' | 570 | words = '无' |
| 549 | boxes = np.array(boxes).reshape((-1, 2)) | 571 | boxes = np.array(boxes).reshape((-1, 2)) |
| 550 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 572 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 573 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 551 | signature_role_4['page_num'] = page_num | 574 | signature_role_4['page_num'] = page_num |
| 552 | signature_role_4['position'] = position | 575 | signature_role_4['position'] = position |
| 553 | signature_role_4['words'] = words | 576 | signature_role_4['words'] = words |
| 554 | return signature_role_4 | 577 | return signature_role_4 |
| 578 | |||
| 555 | def get_signature_role_5(self): | 579 | def get_signature_role_5(self): |
| 556 | signature_role_5 = self.init_item.copy() | 580 | signature_role_5 = self.init_item.copy() |
| 557 | # 先定位签字区域 | 581 | # 先定位签字区域 |
| ... | @@ -582,11 +606,13 @@ class Finder: | ... | @@ -582,11 +606,13 @@ class Finder: |
| 582 | else: | 606 | else: |
| 583 | words = '无' | 607 | words = '无' |
| 584 | boxes = np.array(boxes).reshape((-1, 2)) | 608 | boxes = np.array(boxes).reshape((-1, 2)) |
| 585 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 609 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 610 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 586 | signature_role_5['page_num'] = page_num | 611 | signature_role_5['page_num'] = page_num |
| 587 | signature_role_5['position'] = position | 612 | signature_role_5['position'] = position |
| 588 | signature_role_5['words'] = words | 613 | signature_role_5['words'] = words |
| 589 | return signature_role_5 | 614 | return signature_role_5 |
| 615 | |||
| 590 | def get_last_page_signature(self, page_num, top, bottom): | 616 | def get_last_page_signature(self, page_num, top, bottom): |
| 591 | signature_name = self.item.copy() | 617 | signature_name = self.item.copy() |
| 592 | signature_date = self.item.copy() | 618 | signature_date = self.item.copy() |
| ... | @@ -610,7 +636,7 @@ class Finder: | ... | @@ -610,7 +636,7 @@ class Finder: |
| 610 | for line in block['lines']: | 636 | for line in block['lines']: |
| 611 | for span in line['spans']: | 637 | for span in line['spans']: |
| 612 | bbox, text = span['bbox'], span['text'] | 638 | bbox, text = span['bbox'], span['text'] |
| 613 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | 639 | if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom): |
| 614 | name = text.split(' ')[0] | 640 | name = text.split(' ')[0] |
| 615 | date = text.split(':')[-1] | 641 | date = text.split(':')[-1] |
| 616 | signature_name['words'] = name | 642 | signature_name['words'] = name |
| ... | @@ -618,10 +644,11 @@ class Finder: | ... | @@ -618,10 +644,11 @@ class Finder: |
| 618 | signature_date['words'] = date | 644 | signature_date['words'] = date |
| 619 | signature_date['position'] = bbox | 645 | signature_date['position'] = bbox |
| 620 | return signature_name, signature_date | 646 | return signature_name, signature_date |
| 647 | |||
| 621 | def get_info(self): | 648 | def get_info(self): |
| 622 | """ | 649 | """ |
| 623 | block['type'] == 0 : 表示该元素为图片 | 650 | block['type'] == 0 : 表示该元素为图片 |
| 624 | 651 | ||
| 625 | Returns: | 652 | Returns: |
| 626 | dict: Description | 653 | dict: Description |
| 627 | """ | 654 | """ |
| ... | @@ -672,22 +699,27 @@ class Finder: | ... | @@ -672,22 +699,27 @@ class Finder: |
| 672 | contract_no = self.get_contract_no(page_num='0') | 699 | contract_no = self.get_contract_no(page_num='0') |
| 673 | self.init_result['page_2']['合同编号'] = contract_no | 700 | self.init_result['page_2']['合同编号'] = contract_no |
| 674 | # 找借款人及抵押人(地址字段原本有空格) | 701 | # 找借款人及抵押人(地址字段原本有空格) |
| 675 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:') | 702 | borrower_name, borrower_id = self.get_somebody( |
| 703 | top='借款人及抵押人:', bottom='共同借款人:') | ||
| 676 | # 这是为了同时兼容 8.1 版本 | 704 | # 这是为了同时兼容 8.1 版本 |
| 677 | if borrower_name['words'] == None: | 705 | if borrower_name['words'] == None: |
| 678 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') | 706 | borrower_name, borrower_id = self.get_somebody( |
| 707 | top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') | ||
| 679 | self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name | 708 | self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name |
| 680 | self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id | 709 | self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id |
| 681 | # 找共同借款人及共同抵押人 | 710 | # 找共同借款人及共同抵押人 |
| 682 | co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:') | 711 | co_borrower_name, co_borrower_id = self.get_somebody( |
| 712 | top='共同借款人:', bottom='保证人1:') | ||
| 683 | self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name | 713 | self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name |
| 684 | self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id | 714 | self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id |
| 685 | # 保证人1 | 715 | # 保证人1 |
| 686 | first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:') | 716 | first_guarantor_name, first_guarantor_id = self.get_somebody( |
| 717 | top='保证人1:', bottom='保证人2:') | ||
| 687 | self.init_result['page_2']['保证人1']['name'] = first_guarantor_name | 718 | self.init_result['page_2']['保证人1']['name'] = first_guarantor_name |
| 688 | self.init_result['page_2']['保证人1']['id'] = first_guarantor_id | 719 | self.init_result['page_2']['保证人1']['id'] = first_guarantor_id |
| 689 | # 保证人2 | 720 | # 保证人2 |
| 690 | second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章') | 721 | second_guarantor_name, second_guarantor_id = self.get_somebody( |
| 722 | top='保证人2:', bottom='第一章') | ||
| 691 | self.init_result['page_2']['保证人2']['name'] = second_guarantor_name | 723 | self.init_result['page_2']['保证人2']['name'] = second_guarantor_name |
| 692 | self.init_result['page_2']['保证人2']['id'] = second_guarantor_id | 724 | self.init_result['page_2']['保证人2']['id'] = second_guarantor_id |
| 693 | # 所购车辆价格 | 725 | # 所购车辆价格 |
| ... | @@ -745,23 +777,23 @@ class Finder: | ... | @@ -745,23 +777,23 @@ class Finder: |
| 745 | contract_no = self.get_contract_no(page_num='6') | 777 | contract_no = self.get_contract_no(page_num='6') |
| 746 | self.init_result['page_7']['合同编号'] = contract_no | 778 | self.init_result['page_7']['合同编号'] = contract_no |
| 747 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | 779 | signature_name, signature_date = self.get_last_page_signature(page_num='6', |
| 748 | top='合同编号', bottom='共同借款人') | 780 | top='合同编号', bottom='共同借款人') |
| 749 | self.init_result['page_7']['主借人签字']['签字'] = signature_name | 781 | self.init_result['page_7']['主借人签字']['签字'] = signature_name |
| 750 | self.init_result['page_7']['主借人签字']['日期'] = signature_date | 782 | self.init_result['page_7']['主借人签字']['日期'] = signature_date |
| 751 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | 783 | signature_name, signature_date = self.get_last_page_signature(page_num='6', |
| 752 | top='共同借款人', bottom='保证人1') | 784 | top='共同借款人', bottom='保证人1') |
| 753 | self.init_result['page_7']['共借人签字']['签字'] = signature_name | 785 | self.init_result['page_7']['共借人签字']['签字'] = signature_name |
| 754 | self.init_result['page_7']['共借人签字']['日期'] = signature_date | 786 | self.init_result['page_7']['共借人签字']['日期'] = signature_date |
| 755 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | 787 | signature_name, signature_date = self.get_last_page_signature(page_num='6', |
| 756 | top='保证人1', bottom='保证人2') | 788 | top='保证人1', bottom='保证人2') |
| 757 | self.init_result['page_7']['保证人1签字']['签字'] = signature_name | 789 | self.init_result['page_7']['保证人1签字']['签字'] = signature_name |
| 758 | self.init_result['page_7']['保证人1签字']['日期'] = signature_date | 790 | self.init_result['page_7']['保证人1签字']['日期'] = signature_date |
| 759 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | 791 | signature_name, signature_date = self.get_last_page_signature(page_num='6', |
| 760 | top='保证人2', bottom='在本人面前亲笔签署本合同') | 792 | top='保证人2', bottom='在本人面前亲笔签署本合同') |
| 761 | self.init_result['page_7']['保证人2签字']['签字'] = signature_name | 793 | self.init_result['page_7']['保证人2签字']['签字'] = signature_name |
| 762 | self.init_result['page_7']['保证人2签字']['日期'] = signature_date | 794 | self.init_result['page_7']['保证人2签字']['日期'] = signature_date |
| 763 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | 795 | signature_name, signature_date = self.get_last_page_signature(page_num='6', |
| 764 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | 796 | top='在本人面前亲笔签署本合同', bottom='以下无正文') |
| 765 | self.init_result['page_7']['见证人签字']['签字'] = signature_name | 797 | self.init_result['page_7']['见证人签字']['签字'] = signature_name |
| 766 | self.init_result['page_7']['见证人签字']['日期'] = signature_date | 798 | self.init_result['page_7']['见证人签字']['日期'] = signature_date |
| 767 | else: | 799 | else: |
| ... | @@ -774,27 +806,27 @@ class Finder: | ... | @@ -774,27 +806,27 @@ class Finder: |
| 774 | contract_no = self.get_contract_no(page_num='7') | 806 | contract_no = self.get_contract_no(page_num='7') |
| 775 | self.init_result['page_8']['合同编号'] = contract_no | 807 | self.init_result['page_8']['合同编号'] = contract_no |
| 776 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | 808 | signature_name, signature_date = self.get_last_page_signature(page_num='7', |
| 777 | top='合同编号', bottom='共同借款人') | 809 | top='合同编号', bottom='共同借款人') |
| 778 | self.init_result['page_8']['主借人签字']['签字'] = signature_name | 810 | self.init_result['page_8']['主借人签字']['签字'] = signature_name |
| 779 | self.init_result['page_8']['主借人签字']['日期'] = signature_date | 811 | self.init_result['page_8']['主借人签字']['日期'] = signature_date |
| 780 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | 812 | signature_name, signature_date = self.get_last_page_signature(page_num='7', |
| 781 | top='共同借款人', bottom='保证人1') | 813 | top='共同借款人', bottom='保证人1') |
| 782 | self.init_result['page_8']['共借人签字']['签字'] = signature_name | 814 | self.init_result['page_8']['共借人签字']['签字'] = signature_name |
| 783 | self.init_result['page_8']['共借人签字']['日期'] = signature_date | 815 | self.init_result['page_8']['共借人签字']['日期'] = signature_date |
| 784 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | 816 | signature_name, signature_date = self.get_last_page_signature(page_num='7', |
| 785 | top='保证人1', bottom='保证人2') | 817 | top='保证人1', bottom='保证人2') |
| 786 | self.init_result['page_8']['保证人1签字']['签字'] = signature_name | 818 | self.init_result['page_8']['保证人1签字']['签字'] = signature_name |
| 787 | self.init_result['page_8']['保证人1签字']['日期'] = signature_date | 819 | self.init_result['page_8']['保证人1签字']['日期'] = signature_date |
| 788 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | 820 | signature_name, signature_date = self.get_last_page_signature(page_num='7', |
| 789 | top='保证人2', bottom='在本人面前亲笔签署本合同') | 821 | top='保证人2', bottom='在本人面前亲笔签署本合同') |
| 790 | self.init_result['page_8']['保证人2签字']['签字'] = signature_name | 822 | self.init_result['page_8']['保证人2签字']['签字'] = signature_name |
| 791 | self.init_result['page_8']['保证人2签字']['日期'] = signature_date | 823 | self.init_result['page_8']['保证人2签字']['日期'] = signature_date |
| 792 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | 824 | signature_name, signature_date = self.get_last_page_signature(page_num='7', |
| 793 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | 825 | top='在本人面前亲笔签署本合同', bottom='以下无正文') |
| 794 | self.init_result['page_8']['见证人签字']['签字'] = signature_name | 826 | self.init_result['page_8']['见证人签字']['签字'] = signature_name |
| 795 | self.init_result['page_8']['见证人签字']['日期'] = signature_date | 827 | self.init_result['page_8']['见证人签字']['日期'] = signature_date |
| 796 | # 重新定制输出 | 828 | # 重新定制输出 |
| 797 | new_results = {"is_asp": self.is_asp, | 829 | new_results = {"is_asp": self.is_asp, |
| 798 | "page_info": self.init_result | 830 | "page_info": self.init_result |
| 799 | } | 831 | } |
| 800 | return new_results | 832 | return new_results | ... | ... |
-
Please register or sign in to post a comment