fix bug
Showing
2 changed files
with
8 additions
and
1 deletions
| ... | @@ -431,6 +431,8 @@ class Finder: | ... | @@ -431,6 +431,8 @@ class Finder: |
| 431 | repayment_schedule_text_list = [] | 431 | repayment_schedule_text_list = [] |
| 432 | table = False | 432 | table = False |
| 433 | page = None | 433 | page = None |
| 434 | left = 0 | ||
| 435 | right = 0 | ||
| 434 | for pno in self.pdf_info: | 436 | for pno in self.pdf_info: |
| 435 | for block in self.pdf_info[pno]['blocks']: | 437 | for block in self.pdf_info[pno]['blocks']: |
| 436 | if block['type'] != 0: | 438 | if block['type'] != 0: |
| ... | @@ -438,6 +440,8 @@ class Finder: | ... | @@ -438,6 +440,8 @@ class Finder: |
| 438 | for line in block['lines']: | 440 | for line in block['lines']: |
| 439 | for span in line['spans']: | 441 | for span in line['spans']: |
| 440 | bbox, text = span['bbox'], span['text'] | 442 | bbox, text = span['bbox'], span['text'] |
| 443 | if '剩余融资' in text: | ||
| 444 | right = bbox[2] | ||
| 441 | if '以上表格中所列序号' in text: | 445 | if '以上表格中所列序号' in text: |
| 442 | table = False | 446 | table = False |
| 443 | if table == True: | 447 | if table == True: |
| ... | @@ -448,10 +452,13 @@ class Finder: | ... | @@ -448,10 +452,13 @@ class Finder: |
| 448 | if re.findall("\d+", text): | 452 | if re.findall("\d+", text): |
| 449 | if len(re.findall("\d+", text)) == 1: | 453 | if len(re.findall("\d+", text)) == 1: |
| 450 | continue | 454 | continue |
| 455 | if not left < bbox[0] < right: | ||
| 456 | continue | ||
| 451 | repayment_schedule_text_list.append(text) | 457 | repayment_schedule_text_list.append(text) |
| 452 | if '61.' in text: | 458 | if '61.' in text: |
| 453 | page = pno | 459 | page = pno |
| 454 | table = True | 460 | table = True |
| 461 | left = bbox[0] | ||
| 455 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) | 462 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) |
| 456 | # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] | 463 | # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] |
| 457 | repayment_schedule_table = [['序号', '租金']] | 464 | repayment_schedule_table = [['序号', '租金']] | ... | ... |
| ... | @@ -317,7 +317,7 @@ class Comparison: | ... | @@ -317,7 +317,7 @@ class Comparison: |
| 317 | 317 | ||
| 318 | def se_date_compare(self, input_str, ocr_str, **kwargs): | 318 | def se_date_compare(self, input_str, ocr_str, **kwargs): |
| 319 | if kwargs.get('long', False): | 319 | if kwargs.get('long', False): |
| 320 | if '长期' in ocr_str or '永久' in ocr_str or '***' in ocr_str or '至今' in ocr_str or '年—月—日' in ocr_str or '年 月 日' in ocr_str: | 320 | if '长期' in ocr_str or '永久' in ocr_str or '***' in ocr_str or '至今' in ocr_str or '年—月—日' in ocr_str or '年 月 日' in ocr_str or '年月日' in ocr_str: |
| 321 | if kwargs.get('today', False) or input_str in ['2099-12-31', '2099-01-01', '2999-12-31', '2999-01-01']: | 321 | if kwargs.get('today', False) or input_str in ['2099-12-31', '2099-01-01', '2999-12-31', '2999-01-01']: |
| 322 | return self.RESULT_Y | 322 | return self.RESULT_Y |
| 323 | else: | 323 | else: | ... | ... |
-
Please register or sign in to post a comment