special zfb
Showing
1 changed file
with
29 additions
and
6 deletions
... | @@ -63,8 +63,8 @@ class PDFHandler: | ... | @@ -63,8 +63,8 @@ class PDFHandler: |
63 | self.page_text_list = [] | 63 | self.page_text_list = [] |
64 | self.pdf_info = {} | 64 | self.pdf_info = {} |
65 | self.img_path_pno_list = [] | 65 | self.img_path_pno_list = [] |
66 | # 注意影响self.title_idx | ||
66 | self.ebank_title_list = [ | 67 | self.ebank_title_list = [ |
67 | # '微信支付交易明细证明', | ||
68 | '支付宝收支明细证明', | 68 | '支付宝收支明细证明', |
69 | '招商银行交易流水', | 69 | '招商银行交易流水', |
70 | '中国工商银行借记账户历史明细', | 70 | '中国工商银行借记账户历史明细', |
... | @@ -72,10 +72,13 @@ class PDFHandler: | ... | @@ -72,10 +72,13 @@ class PDFHandler: |
72 | '平安银行个人账户交易明细清单', | 72 | '平安银行个人账户交易明细清单', |
73 | '中国农业银行账戶活期交易明细清单', | 73 | '中国农业银行账戶活期交易明细清单', |
74 | '支付宝(中国)网络技术有限公司 交易流水证明', | 74 | '支付宝(中国)网络技术有限公司 交易流水证明', |
75 | '支付宝(中国)网络技术有限公司 交易流水证明' | 75 | '支付宝(中国)网络技术有限公司 交易流水证明', |
76 | # '微信支付交易明细证明', | ||
76 | ] | 77 | ] |
77 | self.page_count = None | 78 | self.page_count = None |
78 | self.metadata = None | 79 | self.metadata = None |
80 | self.title_idx = None | ||
81 | self.date_pattern = re.compile(r'^\d+ \d{4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)$') | ||
79 | 82 | ||
80 | @staticmethod | 83 | @staticmethod |
81 | def get_pwd_list(doc_name, pwd_list): | 84 | def get_pwd_list(doc_name, pwd_list): |
... | @@ -133,9 +136,28 @@ class PDFHandler: | ... | @@ -133,9 +136,28 @@ class PDFHandler: |
133 | y0 = y0 * height_scale | 136 | y0 = y0 * height_scale |
134 | x1 = x1 * width_scale | 137 | x1 = x1 * width_scale |
135 | y1 = y1 * height_scale | 138 | y1 = y1 * height_scale |
136 | rebuild_text_list.append( | 139 | |
137 | ((x0, y0, x1, y0, x1, y1, x0, y1), normalize('NFKC', text)) | 140 | # 支付宝 交易流水证明 '46428471991912802930901 2022-01-22' 切分日期 |
138 | ) | 141 | if self.title_idx in {6, 7} and self.date_pattern.match(text): |
142 | try: | ||
143 | split_x = x0 + ((x1 - x0) * (10 / len(text))) | ||
144 | |||
145 | date_str = text[-10:] | ||
146 | other_str = text[:-10] | ||
147 | rebuild_text_list.append( | ||
148 | ((split_x, y0, x1, y0, x1, y1, split_x, y1), normalize('NFKC', date_str)) | ||
149 | ) | ||
150 | rebuild_text_list.append( | ||
151 | ((x0, y0, split_x, y0, split_x, y1, x0, y1), normalize('NFKC', other_str)) | ||
152 | ) | ||
153 | except Exception as e: | ||
154 | rebuild_text_list.append( | ||
155 | ((x0, y0, x1, y0, x1, y1, x0, y1), normalize('NFKC', text)) | ||
156 | ) | ||
157 | else: | ||
158 | rebuild_text_list.append( | ||
159 | ((x0, y0, x1, y0, x1, y1, x0, y1), normalize('NFKC', text)) | ||
160 | ) | ||
139 | self.page_text_list[pno]['rebuild_text'] = rebuild_text_list | 161 | self.page_text_list[pno]['rebuild_text'] = rebuild_text_list |
140 | except Exception as e: | 162 | except Exception as e: |
141 | pass | 163 | pass |
... | @@ -305,8 +327,9 @@ class PDFHandler: | ... | @@ -305,8 +327,9 @@ class PDFHandler: |
305 | 327 | ||
306 | def title_is_ebank(self, char): | 328 | def title_is_ebank(self, char): |
307 | new_char = normalize('NFKC', char) | 329 | new_char = normalize('NFKC', char) |
308 | for title in self.ebank_title_list: | 330 | for title_idx, title in enumerate(self.ebank_title_list): |
309 | if new_char.find(title) != -1: | 331 | if new_char.find(title) != -1: |
332 | self.title_idx = title_idx | ||
310 | return True | 333 | return True |
311 | return False | 334 | return False |
312 | 335 | ... | ... |
-
Please register or sign in to post a comment