Merge branch 'feature/license' into feature/mssql
Showing
7 changed files
with
1641 additions
and
233 deletions
1 | import copy | ||
2 | |||
1 | PAGE_DEFAULT = 1 | 3 | PAGE_DEFAULT = 1 |
2 | PAGE_SIZE_DEFAULT = 10 | 4 | PAGE_SIZE_DEFAULT = 10 |
3 | 5 | ||
... | @@ -53,15 +55,19 @@ TRANS_MAP = { | ... | @@ -53,15 +55,19 @@ TRANS_MAP = { |
53 | 'L': "1", | 55 | 'L': "1", |
54 | 56 | ||
55 | 'A': "4", | 57 | 'A': "4", |
58 | |||
56 | 's': "5", | 59 | 's': "5", |
57 | 'S': "5", | 60 | 'S': "5", |
61 | |||
58 | 'b': "6", | 62 | 'b': "6", |
63 | |||
59 | 'g': "9", | 64 | 'g': "9", |
60 | 'E': "9", | 65 | 'E': "9", |
66 | |||
61 | 'B': "13", | 67 | 'B': "13", |
62 | } | 68 | } |
63 | TRANS = str.maketrans(TRANS_MAP) | 69 | TRANS = str.maketrans(TRANS_MAP) |
64 | ERROR_CHARS = {'.', ':', ':', '•', '·'} | 70 | ERROR_CHARS = {'.', '。', ':', ':', '•', '·', ',', ','} |
65 | SKIP_IMG_SHEET_NAME = '未处理图片' | 71 | SKIP_IMG_SHEET_NAME = '未处理图片' |
66 | SKIP_IMG_SHEET_HEADER = ('页码', '序号') | 72 | SKIP_IMG_SHEET_HEADER = ('页码', '序号') |
67 | 73 | ||
... | @@ -70,12 +76,34 @@ UNKNOWN_CARD = '未知卡号' | ... | @@ -70,12 +76,34 @@ UNKNOWN_CARD = '未知卡号' |
70 | UNKNOWN_ROLE = '未知户名' | 76 | UNKNOWN_ROLE = '未知户名' |
71 | DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d'] | 77 | DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d'] |
72 | 78 | ||
73 | AMOUNT_COL_TITLE_SET = {"交易金额", "金额", "收入/支出金额", "发生额"} | ||
74 | OVERAGE_COL_TITLE_SET = {"账户余额", "余额"} | ||
75 | PROOF_COL_TITLE = '核对结果' | 79 | PROOF_COL_TITLE = '核对结果' |
76 | PROOF_RES = ('对', '错') | 80 | PROOF_RES = ('对', '错') |
77 | META_SHEET_TITLE = '关键信息提取和展示' | 81 | META_SHEET_TITLE = '关键信息提取和展示' |
78 | 82 | ||
83 | SUMMARY_KEY = 'summary_col' | ||
84 | DATE_KEY = 'date_col' | ||
85 | AMOUNT_KEY = 'amount_col' | ||
86 | OVER_KEY = 'over_col' | ||
87 | IMCOME_KEY = 'income_col' | ||
88 | OUTLAY_KEY = 'outlay_col' | ||
89 | BORROW_KEY = 'borrow_col' | ||
90 | MIN_ROW_KEY = 'min_row' | ||
91 | FIND_COUNT_KEY = 'find_count' | ||
92 | FIND_COL_KEY = 'find_col' | ||
93 | HEADER_KEY = 'header' | ||
94 | |||
95 | KEY_LIST = [SUMMARY_KEY, DATE_KEY, OVER_KEY, BORROW_KEY, AMOUNT_KEY, IMCOME_KEY, OUTLAY_KEY] | ||
96 | |||
97 | CLASSIFY_MAP = { | ||
98 | SUMMARY_KEY: 5, | ||
99 | DATE_KEY: 0, | ||
100 | AMOUNT_KEY: 2, | ||
101 | OVER_KEY: 3, | ||
102 | IMCOME_KEY: 11, | ||
103 | OUTLAY_KEY: 12, | ||
104 | BORROW_KEY: 10, | ||
105 | } | ||
106 | |||
79 | FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', | 107 | FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', |
80 | '对方开户行', '核对结果', '借贷', '收入', '支出') | 108 | '对方开户行', '核对结果', '借贷', '收入', '支出') |
81 | FIXED_COL_AMOUNT = len(FIXED_HEADERS) | 109 | FIXED_COL_AMOUNT = len(FIXED_HEADERS) |
... | @@ -103,36 +131,60 @@ OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取 | ... | @@ -103,36 +131,60 @@ OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取 |
103 | 131 | ||
104 | # ------------------普通打印-全格线-------------------------------------------------------------------------------------- | 132 | # ------------------普通打印-全格线-------------------------------------------------------------------------------------- |
105 | HEADERS_MAPPING = {} | 133 | HEADERS_MAPPING = {} |
134 | |||
135 | # 借贷 | ||
136 | HEADERS_MAPPING.update( | ||
137 | { | ||
138 | '借贷': BORROW_KEY, | ||
139 | '借贷状态': BORROW_KEY, | ||
140 | '收/支': BORROW_KEY, | ||
141 | } | ||
142 | ) | ||
143 | |||
144 | # 收入 | ||
145 | HEADERS_MAPPING.update( | ||
146 | { | ||
147 | '收入金额': IMCOME_KEY, | ||
148 | '收入': IMCOME_KEY, | ||
149 | '存入': IMCOME_KEY, | ||
150 | '存入金额(贷)': IMCOME_KEY, | ||
151 | '存入金额(贷)': IMCOME_KEY, | ||
152 | } | ||
153 | ) | ||
154 | |||
155 | # 支出 | ||
156 | HEADERS_MAPPING.update( | ||
157 | { | ||
158 | '支出金额': OUTLAY_KEY, | ||
159 | '支出': OUTLAY_KEY, | ||
160 | '支取金额(借)': OUTLAY_KEY, | ||
161 | '支取金额(借)': OUTLAY_KEY, | ||
162 | } | ||
163 | ) | ||
164 | |||
165 | |||
106 | # 横版-表格-中国银行(不规则) | 166 | # 横版-表格-中国银行(不规则) |
107 | HEADERS_MAPPING.update( | 167 | HEADERS_MAPPING.update( |
108 | { | 168 | { |
109 | '记账日期': BASE_HEADERS_MAPPING['记账日期'], | 169 | '记账日期': DATE_KEY, |
110 | '记账时间': BASE_HEADERS_MAPPING['记账时间'], | 170 | '金额': AMOUNT_KEY, |
111 | '金额': BASE_HEADERS_MAPPING['金额'], | 171 | '余额': OVER_KEY, |
112 | '余额': BASE_HEADERS_MAPPING['余额'], | 172 | '附言': SUMMARY_KEY, |
113 | '交易名称': BASE_HEADERS_MAPPING['交易名称'], | ||
114 | '附言': BASE_HEADERS_MAPPING['附言'], | ||
115 | '对方账户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
116 | '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
117 | '对方开户行': BASE_HEADERS_MAPPING['对方开户行'], | ||
118 | } | 173 | } |
119 | ) | 174 | ) |
120 | # 横版-表格-农业银行-中国农业银行个人账户明细 | 175 | # 横版-表格-农业银行-中国农业银行个人账户明细 |
121 | HEADERS_MAPPING.update( | 176 | HEADERS_MAPPING.update( |
122 | { | 177 | { |
123 | '交易日期': BASE_HEADERS_MAPPING['记账日期'], | 178 | '交易日期': DATE_KEY, |
124 | # '存入': BASE_HEADERS_MAPPING['金额'], | 179 | # '存入': AMOUNT_KEY, |
125 | '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | 180 | '摘要': SUMMARY_KEY, |
126 | '对方名称': BASE_HEADERS_MAPPING['对方账户名'], | ||
127 | '摘要': BASE_HEADERS_MAPPING['附言'], | ||
128 | } | 181 | } |
129 | ) | 182 | ) |
130 | # 横版-表格-北京银行 | 183 | # 横版-表格-北京银行 |
131 | HEADERS_MAPPING.update( | 184 | HEADERS_MAPPING.update( |
132 | { | 185 | { |
133 | '业务摘要': BASE_HEADERS_MAPPING['附言'], | 186 | '业务摘要': SUMMARY_KEY, |
134 | '发生额': BASE_HEADERS_MAPPING['金额'], | 187 | '发生额': AMOUNT_KEY, |
135 | '对方户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
136 | } | 188 | } |
137 | ) | 189 | ) |
138 | # 横版-表格-工商银行 借记卡账户历史明细清单 | 190 | # 横版-表格-工商银行 借记卡账户历史明细清单 |
... | @@ -142,8 +194,8 @@ HEADERS_MAPPING.update( | ... | @@ -142,8 +194,8 @@ HEADERS_MAPPING.update( |
142 | # 工商银行历史明细(申请单号:20042501303039397888) | 194 | # 工商银行历史明细(申请单号:20042501303039397888) |
143 | HEADERS_MAPPING.update( | 195 | HEADERS_MAPPING.update( |
144 | { | 196 | { |
145 | '收入/支出金额': BASE_HEADERS_MAPPING['金额'], | 197 | '收入/支出金额': AMOUNT_KEY, |
146 | '工作日期': BASE_HEADERS_MAPPING['记账日期'], | 198 | '工作日期': DATE_KEY, |
147 | } | 199 | } |
148 | ) | 200 | ) |
149 | 201 | ||
... | @@ -153,26 +205,23 @@ HEADERS_MAPPING.update( | ... | @@ -153,26 +205,23 @@ HEADERS_MAPPING.update( |
153 | # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2) | 205 | # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2) |
154 | HEADERS_MAPPING.update( | 206 | HEADERS_MAPPING.update( |
155 | { | 207 | { |
156 | '交易金额': BASE_HEADERS_MAPPING['金额'], | 208 | '交易金额': AMOUNT_KEY, |
157 | '账户余额': BASE_HEADERS_MAPPING['余额'], | 209 | '账户余额': OVER_KEY, |
158 | '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
159 | } | 210 | } |
160 | ) | 211 | ) |
161 | # 微信 | 212 | # 微信 |
162 | HEADERS_MAPPING.update( | 213 | HEADERS_MAPPING.update( |
163 | { | 214 | { |
164 | '交易时间': BASE_HEADERS_MAPPING['记账时间'], | 215 | '交易类型': SUMMARY_KEY, |
165 | '交易类型': BASE_HEADERS_MAPPING['附言'], | 216 | '金额(元)': AMOUNT_KEY, |
166 | '金额(元)': BASE_HEADERS_MAPPING['金额'], | 217 | '金额(元)': AMOUNT_KEY, |
167 | '金额(元)': BASE_HEADERS_MAPPING['金额'], | ||
168 | '交易对方': BASE_HEADERS_MAPPING['对方账户名'], | ||
169 | } | 218 | } |
170 | ) | 219 | ) |
171 | # 支付宝 | 220 | # 支付宝 |
172 | HEADERS_MAPPING.update( | 221 | HEADERS_MAPPING.update( |
173 | { | 222 | { |
174 | '时间': BASE_HEADERS_MAPPING['记账日期'], | 223 | '时间': DATE_KEY, |
175 | '名称/备注': BASE_HEADERS_MAPPING['附言'], | 224 | '名称/备注': SUMMARY_KEY, |
176 | } | 225 | } |
177 | ) | 226 | ) |
178 | 227 | ||
... | @@ -182,33 +231,28 @@ HEADERS_MAPPING.update( | ... | @@ -182,33 +231,28 @@ HEADERS_MAPPING.update( |
182 | # 竖版-无表格-农业银行CH-B008805428 | 231 | # 竖版-无表格-农业银行CH-B008805428 |
183 | HEADERS_MAPPING.update( | 232 | HEADERS_MAPPING.update( |
184 | { | 233 | { |
185 | '摘要/附言': BASE_HEADERS_MAPPING['附言'], | 234 | '摘要/附言': SUMMARY_KEY, |
186 | '交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
187 | } | 235 | } |
188 | ) | 236 | ) |
189 | # 农业银行-窄页 | 237 | # 农业银行-窄页 |
190 | HEADERS_MAPPING.update( | 238 | |
191 | { | ||
192 | '交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
193 | } | ||
194 | ) | ||
195 | # 竖版-特殊-农商行 | 239 | # 竖版-特殊-农商行 |
196 | HEADERS_MAPPING.update( | 240 | HEADERS_MAPPING.update( |
197 | { | 241 | { |
198 | '交易发生额': BASE_HEADERS_MAPPING['金额'], | 242 | '交易发生额': AMOUNT_KEY, |
199 | } | 243 | } |
200 | ) | 244 | ) |
201 | # 横版-特殊-中信银行-账户交易明细 | 245 | # 横版-特殊-中信银行-账户交易明细 |
202 | HEADERS_MAPPING.update( | 246 | HEADERS_MAPPING.update( |
203 | { | 247 | { |
204 | '对方银行': BASE_HEADERS_MAPPING['对方开户行'], | 248 | '交易摘要': SUMMARY_KEY, |
205 | '交易摘要': BASE_HEADERS_MAPPING['附言'], | ||
206 | } | 249 | } |
207 | ) | 250 | ) |
208 | # 平安电子账单 | 251 | # 平安电子账单 |
209 | HEADERS_MAPPING.update( | 252 | HEADERS_MAPPING.update( |
210 | { | 253 | { |
211 | '借贷发生额(借:-贷:+)': BASE_HEADERS_MAPPING['金额'], | 254 | '借贷发生额(借:-贷:+)': AMOUNT_KEY, |
255 | '借贷发生额(借:-贷:+)': AMOUNT_KEY, | ||
212 | } | 256 | } |
213 | ) | 257 | ) |
214 | 258 | ||
... | @@ -218,7 +262,7 @@ HEADERS_MAPPING.update( | ... | @@ -218,7 +262,7 @@ HEADERS_MAPPING.update( |
218 | # 竖版-无表格-招商银行账户历史交易明细表 | 262 | # 竖版-无表格-招商银行账户历史交易明细表 |
219 | HEADERS_MAPPING.update( | 263 | HEADERS_MAPPING.update( |
220 | { | 264 | { |
221 | '联机余额': BASE_HEADERS_MAPPING['余额'], | 265 | '联机余额': OVER_KEY, |
222 | } | 266 | } |
223 | ) | 267 | ) |
224 | # 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 | 268 | # 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 |
... | @@ -226,28 +270,23 @@ HEADERS_MAPPING.update( | ... | @@ -226,28 +270,23 @@ HEADERS_MAPPING.update( |
226 | # 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单 | 270 | # 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单 |
227 | HEADERS_MAPPING.update( | 271 | HEADERS_MAPPING.update( |
228 | { | 272 | { |
229 | '交易金额(元)': BASE_HEADERS_MAPPING['金额'], | 273 | '交易金额(元)': AMOUNT_KEY, |
230 | '交易金额(元)': BASE_HEADERS_MAPPING['金额'], | 274 | '交易金额(元)': AMOUNT_KEY, |
231 | '账户余额(元)': BASE_HEADERS_MAPPING['余额'], | 275 | '账户余额(元)': OVER_KEY, |
232 | '账户余额(元)': BASE_HEADERS_MAPPING['余额'], | 276 | '账户余额(元)': OVER_KEY, |
233 | '对手方户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
234 | '对手方账户': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
235 | } | 277 | } |
236 | ) | 278 | ) |
237 | # 横版-无表格-广发银行-账户交易历史 --> 已废弃 | 279 | # 横版-无表格-广发银行-账户交易历史 --> 已废弃 |
238 | # 竖版-无表格-广发银行-账户交易历史 --> 已废弃 | 280 | # 竖版-无表格-广发银行-账户交易历史 --> 已废弃 |
239 | HEADERS_MAPPING.update( | 281 | HEADERS_MAPPING.update( |
240 | { | 282 | { |
241 | '会计日期': BASE_HEADERS_MAPPING['记账日期'], | 283 | '会计日期': DATE_KEY, |
242 | '对手户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
243 | '对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
244 | } | 284 | } |
245 | ) | 285 | ) |
246 | # 招行电子账单 TODO 有英文,需测试 | 286 | # 招行电子账单 TODO 有英文,需测试 |
247 | HEADERS_MAPPING.update( | 287 | HEADERS_MAPPING.update( |
248 | { | 288 | { |
249 | '对手信息': BASE_HEADERS_MAPPING['对方账户名'], | 289 | '摘要代码': SUMMARY_KEY, |
250 | '摘要代码': BASE_HEADERS_MAPPING['附言'], | ||
251 | } | 290 | } |
252 | ) | 291 | ) |
253 | # 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号) | 292 | # 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号) |
... | @@ -255,46 +294,36 @@ HEADERS_MAPPING.update( | ... | @@ -255,46 +294,36 @@ HEADERS_MAPPING.update( |
255 | # 横版-无表格-民生银行 | 294 | # 横版-无表格-民生银行 |
256 | HEADERS_MAPPING.update( | 295 | HEADERS_MAPPING.update( |
257 | { | 296 | { |
258 | '摘要信息': BASE_HEADERS_MAPPING['附言'], | 297 | '摘要信息': SUMMARY_KEY, |
259 | '对方行名': BASE_HEADERS_MAPPING['对方开户行'], | ||
260 | } | 298 | } |
261 | ) | 299 | ) |
262 | # 竖版-无表格-农业银行整数 | 300 | # 竖版-无表格-农业银行整数 |
263 | # 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单 | 301 | # 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单 |
264 | HEADERS_MAPPING.update( | 302 | |
265 | { | ||
266 | '对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
267 | } | ||
268 | ) | ||
269 | # 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf | 303 | # 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf |
270 | # 竖版-无表格-农业银行-扩张.pdf | 304 | # 竖版-无表格-农业银行-扩张.pdf |
271 | # 竖版-无表格-农业银行-缩进.pdf | 305 | # 竖版-无表格-农业银行-缩进.pdf |
272 | HEADERS_MAPPING.update( | 306 | HEADERS_MAPPING.update( |
273 | { | 307 | { |
274 | '日期': BASE_HEADERS_MAPPING['记账日期'], | 308 | '日期': DATE_KEY, |
275 | '短摘要': BASE_HEADERS_MAPPING['附言'], | 309 | '短摘要': SUMMARY_KEY, |
276 | '本次余额': BASE_HEADERS_MAPPING['余额'], | 310 | '本次余额': OVER_KEY, |
277 | } | 311 | } |
278 | ) | 312 | ) |
279 | # 竖版-无表格-农业银行-无标题(对手帐号) | 313 | # 竖版-无表格-农业银行-无标题(对手帐号) |
280 | HEADERS_MAPPING.update( | 314 | HEADERS_MAPPING.update( |
281 | { | 315 | { |
282 | '交易后余额': BASE_HEADERS_MAPPING['余额'], | 316 | '交易后余额': OVER_KEY, |
283 | '对手帐号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
284 | } | 317 | } |
285 | ) | 318 | ) |
286 | # 竖版-无表格-农商行(非常规) | 319 | # 竖版-无表格-农商行(非常规) |
287 | HEADERS_MAPPING.update( | 320 | HEADERS_MAPPING.update( |
288 | { | 321 | { |
289 | '交易说明': BASE_HEADERS_MAPPING['附言'], | 322 | '交易说明': SUMMARY_KEY, |
290 | } | 323 | } |
291 | ) | 324 | ) |
292 | # 竖版-无表格-工商银行 抬头三行 活期历史明细清单 | 325 | # 竖版-无表格-工商银行 抬头三行 活期历史明细清单 |
293 | HEADERS_MAPPING.update( | 326 | |
294 | { | ||
295 | '对方账户': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
296 | } | ||
297 | ) | ||
298 | 327 | ||
299 | # -----------针式打印-全格线-------------------------------------------------------------------------------------------- | 328 | # -----------针式打印-全格线-------------------------------------------------------------------------------------------- |
300 | # 竖版-表格-建设银行-中国建设银行活期账户交易明细 | 329 | # 竖版-表格-建设银行-中国建设银行活期账户交易明细 |
... | @@ -302,25 +331,19 @@ HEADERS_MAPPING.update( | ... | @@ -302,25 +331,19 @@ HEADERS_MAPPING.update( |
302 | # 竖版-表格-建设银行-对私活期账户明细- (1).pdf | 331 | # 竖版-表格-建设银行-对私活期账户明细- (1).pdf |
303 | HEADERS_MAPPING.update( | 332 | HEADERS_MAPPING.update( |
304 | { | 333 | { |
305 | '帐户余额': BASE_HEADERS_MAPPING['余额'], | 334 | '帐户余额': OVER_KEY, |
306 | '对方帐户名称': BASE_HEADERS_MAPPING['对方账户名'], | ||
307 | } | 335 | } |
308 | ) | 336 | ) |
309 | # 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录 | 337 | # 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录 |
310 | HEADERS_MAPPING.update( | 338 | HEADERS_MAPPING.update( |
311 | { | 339 | { |
312 | '交易日期 记账日期': BASE_HEADERS_MAPPING['记账日期'], | 340 | '交易日期 记账日期': DATE_KEY, |
313 | } | 341 | } |
314 | ) | 342 | ) |
315 | 343 | ||
316 | # ----------针式打印-部分格线------------------------------------------------------------------------------------------ | 344 | # ----------针式打印-部分格线------------------------------------------------------------------------------------------ |
317 | # 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户) | 345 | # 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户) |
318 | # 竖版-特殊-邮储银行-账户交易明细(客户) | 346 | # 竖版-特殊-邮储银行-账户交易明细(客户) |
319 | HEADERS_MAPPING.update( | ||
320 | { | ||
321 | '对方账号/卡号/汇票号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
322 | } | ||
323 | ) | ||
324 | 347 | ||
325 | # -------------------------------------------------------------------------------------------------------------------- | 348 | # -------------------------------------------------------------------------------------------------------------------- |
326 | 349 | ||
... | @@ -432,63 +455,6 @@ HEADERS_MAPPING.update( | ... | @@ -432,63 +455,6 @@ HEADERS_MAPPING.update( |
432 | 455 | ||
433 | OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None) | 456 | OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None) |
434 | 457 | ||
435 | # { | ||
436 | # "0":"其他", | ||
437 | # "1":"普通打印-全表格-中国农业银行", | ||
438 | # "2":"普通打印-全表格-中国银行", | ||
439 | # "3":"普通打印-全表格-北京银行", | ||
440 | # "4":"普通打印-全表格-工商银行", | ||
441 | # "5":"普通打印-全表格-建设银行", | ||
442 | # "6":"普通打印-全表格-微信账单", | ||
443 | # "7":"普通打印-全表格-支付宝账单", | ||
444 | # "8":"普通打印-无格线-中国邮政储蓄银行", | ||
445 | |||
446 | # "9":"普通打印-无格线-交通银行", | ||
447 | # "10":"普通打印-无格线-农业银行整数", | ||
448 | # "11":"普通打印-无格线-农业银行银行活期扩张缩进", | ||
449 | # "12":"普通打印-无格线-招商银行", | ||
450 | # "13":"普通打印-无格线-招行电子账单", | ||
451 | # "14":"普通打印-无格线-民生银行", | ||
452 | |||
453 | # "15":"普通打印-部分格线-横版-中信银行", | ||
454 | # "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页", | ||
455 | # "17":"普通打印-部分格线-竖版-农业银行", | ||
456 | # "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细", | ||
457 | # "19":"普通打印-部分格线-竖版-平安电子账单", | ||
458 | |||
459 | # "20":"针式打印-全格线-建设银行", | ||
460 | # "21":"针式打印-部分格线-竖版-邮储银行账户交易", | ||
461 | # "22":"针式打印-部分格线-邮储银行一本通绿卡" | ||
462 | # } | ||
463 | |||
464 | # CLASSIFY_LIST = [ | ||
465 | # ('其他', OTHER_TUPLE), | ||
466 | # ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)), | ||
467 | # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), | ||
468 | # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), | ||
469 | # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | ||
470 | # ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)), | ||
471 | # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | ||
472 | # ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), | ||
473 | # | ||
474 | # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | ||
475 | # ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | ||
476 | # ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)), | ||
477 | # ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), | ||
478 | # ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | ||
479 | # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | ||
480 | # | ||
481 | # ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), | ||
482 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
483 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
484 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
485 | # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
486 | # | ||
487 | # ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)), | ||
488 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
489 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
490 | # ] | ||
491 | |||
492 | # "4":"普通打印-全表格-中国银行", | 458 | # "4":"普通打印-全表格-中国银行", |
493 | # "5":"普通打印-全表格-农业银行-10列", | 459 | # "5":"普通打印-全表格-农业银行-10列", |
494 | # "6":"普通打印-全表格-农业银行-10列-1", | 460 | # "6":"普通打印-全表格-农业银行-10列-1", |
... | @@ -563,6 +529,49 @@ CLASSIFY_LIST = [ | ... | @@ -563,6 +529,49 @@ CLASSIFY_LIST = [ |
563 | ('其他', OTHER_TUPLE), | 529 | ('其他', OTHER_TUPLE), |
564 | ] | 530 | ] |
565 | 531 | ||
532 | CLASSIFY_HEADER_LIST = [ | ||
533 | OTHER_TUPLE, | ||
534 | OTHER_TUPLE, | ||
535 | OTHER_TUPLE, | ||
536 | OTHER_TUPLE, | ||
537 | ('记账日期', '记账时间', '币别', '金额', '余额', '交易名称', '渠道', '网点名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行'), | ||
538 | ('交易日期', '交易网点', '存入', '支出', '余额', '对方账号', '对方名称', '摘要', '渠道', '附言'), | ||
539 | ('序号', '日期', '摘要', '交易金额', '余额', '对方账号', '对方名称', '交易地点', '渠道', '附言'), | ||
540 | ('交易日期', '摘要', '交易金额', '余额', '交易渠道', '交易网点', '对方账号', '对方名称', '附言'), | ||
541 | ('交易日期', '业务摘要', '收/支', '发生额', '余额', '对方户名', '对方账号', '交易渠道'), | ||
542 | ('交易日期', '账号', '储种', '序号', '币种', '钞汇', '摘要', '地区', '收入/支出金额', '余额', '渠道'), | ||
543 | ('交易日期', '账号', '储种', '序号', '币种', '钞汇', '摘要', '地区', '收入/支出金额', '余额', '对方户名', '对方账号', '渠道'), | ||
544 | (None, '摘要', '交易日期', '交易金额', '账户余额', '商户/网点号及其名称', '对方账号与户名'), | ||
545 | ('交易单号', '交易时间', '交易类型', '收/支/其他', '交易方式', '金额(元)', '交易对方', '商户单号'), | ||
546 | ('流水号', '时间', '名称/备注', '收入', '支出', '账户余额', '资金渠道'), | ||
547 | |||
548 | ('交易日期', '记账日期', '交易地点', '交易类型', '借贷状态', '交易金额', '余额'), | ||
549 | ('交易日期', '交易类型', '交易金额(元)', '账户余额(元)', '操作柜员'), | ||
550 | ('交易日期', '交易类型', '交易币种', '交易金额(元)', '账户余额(元)', '对手方户名', '对手方账户', '收支类型'), | ||
551 | ('日期', '时间', '日志号', '短摘要', '交易金额', '本次余额', '交易网点', '渠道', '附言'), | ||
552 | ('交易日期', '摘要/附言', '交易金额', '对方账号和户名'), | ||
553 | ('记账日期', '货币', '交易金额', '联机余额', '冲补账', '交易摘要'), | ||
554 | ('记账日期', '货币', '交易金额', '联机余额', '交易摘要', '对手信息'), | ||
555 | ('凭证类型', '凭证号码', '交易时间', '摘要', '交易金额', '账户余额', '现转标志', '交易渠道', '交易机构', '对方户名', '对方行名'), | ||
556 | |||
557 | ('交易日期', '交易摘要', '收入金额', '支出金额', '账户余额', '对方户名', '对方账号', '对方银行', '交易流水号'), | ||
558 | ('交易日期', '摘要/附言', '交易金额', '余额', '交易地点/对方账号和户名'), | ||
559 | ('日期', '地点', '摘要', '存入', '支出', '余额', '对方账号', '对方户名'), | ||
560 | ('日期', '摘要', '交易金额', '余额', '地点', '交易对手账号', '对方户名'), | ||
561 | ('序号', '交易日期', '交易网点', '摘要', '借贷发生额(借:-贷:+)', '账户余额'), | ||
562 | ('序号', '摘要', '币别', '钞汇', '交易日期', '交易金额', '账户余额', '交易地点附言', '对方账号与户名'), | ||
563 | OTHER_TUPLE, | ||
564 | OTHER_TUPLE, | ||
565 | OTHER_TUPLE, | ||
566 | OTHER_TUPLE, | ||
567 | OTHER_TUPLE, | ||
568 | OTHER_TUPLE, | ||
569 | OTHER_TUPLE, | ||
570 | ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'), | ||
571 | ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'), | ||
572 | OTHER_TUPLE, | ||
573 | ] | ||
574 | |||
566 | # ----------license相关------------------------------------------------------------------------------------------------ | 575 | # ----------license相关------------------------------------------------------------------------------------------------ |
567 | 576 | ||
568 | # "0":"AVT Invioce", | 577 | # "0":"AVT Invioce", |
... | @@ -603,9 +612,9 @@ RP_FIELD_ORDER_1 = IC_FIELD_ORDER_1 | ... | @@ -603,9 +612,9 @@ RP_FIELD_ORDER_1 = IC_FIELD_ORDER_1 |
603 | VAT_CN_NAME = 'VAT普票' | 612 | VAT_CN_NAME = 'VAT普票' |
604 | VAT_CLASSIFY = 0 | 613 | VAT_CLASSIFY = 0 |
605 | VAT_FIELD_ORDER = (('发票代码', '发票代码'), | 614 | VAT_FIELD_ORDER = (('发票代码', '发票代码'), |
606 | ('发票代码(开具)', '发票代码(开具)'), | 615 | ('发票代码_开具', '发票代码(开具)'), |
607 | ('发票号码', '发票号码'), | 616 | ('发票号码', '发票号码'), |
608 | ('发票号码(开具)', '发票号码(开具)'), | 617 | ('发票号码_开具', '发票号码(开具)'), |
609 | ('开票日期', '开票日期'), | 618 | ('开票日期', '开票日期'), |
610 | ('校验码', '校验码'), | 619 | ('校验码', '校验码'), |
611 | ('货物或应税劳务、服务名称', '货物或应税劳务、服务名称'), | 620 | ('货物或应税劳务、服务名称', '货物或应税劳务、服务名称'), |
... | @@ -622,7 +631,7 @@ VAT_FIELD_ORDER = (('发票代码', '发票代码'), | ... | @@ -622,7 +631,7 @@ VAT_FIELD_ORDER = (('发票代码', '发票代码'), |
622 | ('销方纳税人识别号', '销售方纳税人识别号'), | 631 | ('销方纳税人识别号', '销售方纳税人识别号'), |
623 | ('销方地址、电话', '销售方地址、电话'), | 632 | ('销方地址、电话', '销售方地址、电话'), |
624 | ('销方开户行及账号', '销售方开户行及账号'), | 633 | ('销方开户行及账号', '销售方开户行及账号'), |
625 | ('销售方:(章)', '销售方:(章)'), | 634 | ('下盖章', '销售方:(章)'), |
626 | ('备注', '备注'),) | 635 | ('备注', '备注'),) |
627 | # 机动车登记证书 | 636 | # 机动车登记证书 |
628 | MVC_CN_NAME = '机动车登记证书' | 637 | MVC_CN_NAME = '机动车登记证书' |
... | @@ -856,3 +865,11 @@ LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER) | ... | @@ -856,3 +865,11 @@ LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER) |
856 | OTHER_CLASSIFY_SET = {OTHER_CLASSIFY} | 865 | OTHER_CLASSIFY_SET = {OTHER_CLASSIFY} |
857 | LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY} | 866 | LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY} |
858 | LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY} | 867 | LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY} |
868 | |||
869 | WECHART_CLASSIFY = 12 | ||
870 | WECHART_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING) | ||
871 | WECHART_HEADERS_MAPPING.update( | ||
872 | { | ||
873 | '交易时间': DATE_KEY, | ||
874 | } | ||
875 | ) | ... | ... |
src/apps/doc/consts_bak.py
0 → 100644
1 | PAGE_DEFAULT = 1 | ||
2 | PAGE_SIZE_DEFAULT = 10 | ||
3 | |||
4 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | ||
5 | |||
6 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT'] | ||
7 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | ||
8 | |||
9 | HIL_PREFIX = 'HIL' | ||
10 | AFC_PREFIX = 'AFC' | ||
11 | SPLIT_STR = '_' | ||
12 | BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX] | ||
13 | HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'} | ||
14 | |||
15 | # -------EDMS相关--------------------------------------------------------------------------------------------------- | ||
16 | |||
17 | SESSION_PREFIX = 'FHLSID' | ||
18 | CUSTOM_CLIENT = 'CustomClient' | ||
19 | FIXED_TOKEN = '00000000-0000-0000-0000-000000000000' | ||
20 | FIXED_FILE_SIZE = 0 | ||
21 | DOWNLOAD_ACTION_TYPE = 'Downloaded' | ||
22 | |||
23 | DOC_SCHEMA_ID_FILL = { | ||
24 | 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'), | ||
25 | 'SETTLEMENT': (20, 'DFE-AutoFilingScript'), | ||
26 | 'CONTRACT MANAGEMENT': (86, 'Schema-Based') | ||
27 | } | ||
28 | BUSINESS_TYPE_DICT = { | ||
29 | HIL_PREFIX: 'CO00002', | ||
30 | AFC_PREFIX: 'CO00001' | ||
31 | } | ||
32 | DOC_SCHEMA_TYPE = 'ElectronicRecord' | ||
33 | APPLICATION_ID_META_FIELD_id = 1 | ||
34 | DEALER_CODE_META_FIELD_id = 13 | ||
35 | BUSINESS_TYPE_META_FIELD_id = 93 | ||
36 | DEALER_CODE = 'ocr_situ_group' | ||
37 | |||
38 | RETRY_TIMES = 3 | ||
39 | |||
40 | # ---------银行流水模板相关-------------------------------------------------------------------------------------------- | ||
41 | |||
42 | TRANS_MAP = { | ||
43 | 'C': "0", | ||
44 | 'c': "0", | ||
45 | '(': "0", | ||
46 | 'o': "0", | ||
47 | 'O': "0", | ||
48 | 'D': "0", | ||
49 | |||
50 | '[': "1", | ||
51 | ']': "1", | ||
52 | 'l': "1", | ||
53 | 'L': "1", | ||
54 | |||
55 | 'A': "4", | ||
56 | |||
57 | 's': "5", | ||
58 | 'S': "5", | ||
59 | |||
60 | 'b': "6", | ||
61 | |||
62 | 'g': "9", | ||
63 | 'E': "9", | ||
64 | |||
65 | 'B': "13", | ||
66 | } | ||
67 | TRANS = str.maketrans(TRANS_MAP) | ||
68 | ERROR_CHARS = {'.', '。', ':', ':', '•', '·', ',', ','} | ||
69 | SKIP_IMG_SHEET_NAME = '未处理图片' | ||
70 | SKIP_IMG_SHEET_HEADER = ('页码', '序号') | ||
71 | |||
72 | CARD_RATIO = 0.9 | ||
73 | UNKNOWN_CARD = '未知卡号' | ||
74 | UNKNOWN_ROLE = '未知户名' | ||
75 | DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d'] | ||
76 | |||
77 | PROOF_COL_TITLE = '核对结果' | ||
78 | PROOF_RES = ('对', '错') | ||
79 | META_SHEET_TITLE = '关键信息提取和展示' | ||
80 | |||
81 | FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', | ||
82 | '对方开户行', '核对结果', '借贷', '收入', '支出') | ||
83 | FIXED_COL_AMOUNT = len(FIXED_HEADERS) | ||
84 | BASE_HEADERS_MAPPING = {label: idx + 1 for idx, label in enumerate(FIXED_HEADERS)} | ||
85 | BORROW_HEADER_COL = BASE_HEADERS_MAPPING['借贷'] | ||
86 | INCOME_HEADER_COL = BASE_HEADERS_MAPPING['收入'] | ||
87 | OUTLAY_HEADER_COL = BASE_HEADERS_MAPPING['支出'] | ||
88 | RESULT_HEADER_COL = BASE_HEADERS_MAPPING['核对结果'] | ||
89 | BORROW_IDX = BORROW_HEADER_COL - 1 | ||
90 | INCOME_IDX = INCOME_HEADER_COL - 1 | ||
91 | OUTLAY_IDX = OUTLAY_HEADER_COL - 1 | ||
92 | SUMMARY_IDX = FIXED_HEADERS.index('附言') | ||
93 | DATE_IDX = FIXED_HEADERS.index('记账日期') | ||
94 | AMOUNT_IDX = FIXED_HEADERS.index('金额') | ||
95 | OVER_IDX = FIXED_HEADERS.index('余额') | ||
96 | RESULT_IDX = FIXED_HEADERS.index('核对结果') | ||
97 | # '借贷': ('贷', '借'), # 竖版-无表格-广发银行 | ||
98 | # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行 | ||
99 | # '收/支': ('收入', '支出'), # 横版-表格-北京银行 | ||
100 | BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支'} | ||
101 | BORROW_INCOME_SET = {'贷', '收入'} | ||
102 | BORROW_OUTLAY_SET = {'借', '支出'} | ||
103 | INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'} | ||
104 | OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'} | ||
105 | |||
106 | # ------------------普通打印-全格线-------------------------------------------------------------------------------------- | ||
107 | HEADERS_MAPPING = {} | ||
108 | # 横版-表格-中国银行(不规则) | ||
109 | HEADERS_MAPPING.update( | ||
110 | { | ||
111 | '记账日期': BASE_HEADERS_MAPPING['记账日期'], | ||
112 | '记账时间': BASE_HEADERS_MAPPING['记账时间'], | ||
113 | '金额': BASE_HEADERS_MAPPING['金额'], | ||
114 | '余额': BASE_HEADERS_MAPPING['余额'], | ||
115 | '交易名称': BASE_HEADERS_MAPPING['交易名称'], | ||
116 | '附言': BASE_HEADERS_MAPPING['附言'], | ||
117 | '对方账户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
118 | '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
119 | '对方开户行': BASE_HEADERS_MAPPING['对方开户行'], | ||
120 | } | ||
121 | ) | ||
122 | # 横版-表格-农业银行-中国农业银行个人账户明细 | ||
123 | HEADERS_MAPPING.update( | ||
124 | { | ||
125 | '交易日期': BASE_HEADERS_MAPPING['记账日期'], | ||
126 | # '存入': BASE_HEADERS_MAPPING['金额'], | ||
127 | '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
128 | '对方名称': BASE_HEADERS_MAPPING['对方账户名'], | ||
129 | '摘要': BASE_HEADERS_MAPPING['附言'], | ||
130 | } | ||
131 | ) | ||
132 | # 横版-表格-北京银行 | ||
133 | HEADERS_MAPPING.update( | ||
134 | { | ||
135 | '业务摘要': BASE_HEADERS_MAPPING['附言'], | ||
136 | '发生额': BASE_HEADERS_MAPPING['金额'], | ||
137 | '对方户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
138 | } | ||
139 | ) | ||
140 | # 横版-表格-工商银行 借记卡账户历史明细清单 | ||
141 | # 横版-表格-工商银行-机打验证码 借记卡账户历史明细清单 | ||
142 | # 横版-表格-工商银行CH-B008802400 | ||
143 | # 横版-表格-工商银行 工资明细清单 | ||
144 | # 工商银行历史明细(申请单号:20042501303039397888) | ||
145 | HEADERS_MAPPING.update( | ||
146 | { | ||
147 | '收入/支出金额': BASE_HEADERS_MAPPING['金额'], | ||
148 | '工作日期': BASE_HEADERS_MAPPING['记账日期'], | ||
149 | } | ||
150 | ) | ||
151 | |||
152 | # 横版-表格-建设银行-个人活期账户交易明细 | ||
153 | # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 | ||
154 | # 竖版-表格-建设银行-工资账单CH-B008786812 | ||
155 | # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2) | ||
156 | HEADERS_MAPPING.update( | ||
157 | { | ||
158 | '交易金额': BASE_HEADERS_MAPPING['金额'], | ||
159 | '账户余额': BASE_HEADERS_MAPPING['余额'], | ||
160 | '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
161 | } | ||
162 | ) | ||
163 | # 微信 | ||
164 | HEADERS_MAPPING.update( | ||
165 | { | ||
166 | '交易时间': BASE_HEADERS_MAPPING['记账时间'], | ||
167 | '交易类型': BASE_HEADERS_MAPPING['附言'], | ||
168 | '金额(元)': BASE_HEADERS_MAPPING['金额'], | ||
169 | '金额(元)': BASE_HEADERS_MAPPING['金额'], | ||
170 | '交易对方': BASE_HEADERS_MAPPING['对方账户名'], | ||
171 | } | ||
172 | ) | ||
173 | # 支付宝 | ||
174 | HEADERS_MAPPING.update( | ||
175 | { | ||
176 | '时间': BASE_HEADERS_MAPPING['记账日期'], | ||
177 | '名称/备注': BASE_HEADERS_MAPPING['附言'], | ||
178 | } | ||
179 | ) | ||
180 | |||
181 | # ------------普通打印-部分格线------------------------------------------------------------------------------------------- | ||
182 | |||
183 | # 竖版-无表格-农业银行 | ||
184 | # 竖版-无表格-农业银行CH-B008805428 | ||
185 | HEADERS_MAPPING.update( | ||
186 | { | ||
187 | '摘要/附言': BASE_HEADERS_MAPPING['附言'], | ||
188 | '交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
189 | } | ||
190 | ) | ||
191 | # 农业银行-窄页 | ||
192 | HEADERS_MAPPING.update( | ||
193 | { | ||
194 | '交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
195 | } | ||
196 | ) | ||
197 | # 竖版-特殊-农商行 | ||
198 | HEADERS_MAPPING.update( | ||
199 | { | ||
200 | '交易发生额': BASE_HEADERS_MAPPING['金额'], | ||
201 | } | ||
202 | ) | ||
203 | # 横版-特殊-中信银行-账户交易明细 | ||
204 | HEADERS_MAPPING.update( | ||
205 | { | ||
206 | '对方银行': BASE_HEADERS_MAPPING['对方开户行'], | ||
207 | '交易摘要': BASE_HEADERS_MAPPING['附言'], | ||
208 | } | ||
209 | ) | ||
210 | # 平安电子账单 | ||
211 | HEADERS_MAPPING.update( | ||
212 | { | ||
213 | '借贷发生额(借:-贷:+)': BASE_HEADERS_MAPPING['金额'], | ||
214 | } | ||
215 | ) | ||
216 | |||
217 | # ------------普通打印-无格线-------------------------------------------------------------------------------------------- | ||
218 | |||
219 | # 竖版-无表格-招商银行(略歪) | ||
220 | # 竖版-无表格-招商银行账户历史交易明细表 | ||
221 | HEADERS_MAPPING.update( | ||
222 | { | ||
223 | '联机余额': BASE_HEADERS_MAPPING['余额'], | ||
224 | } | ||
225 | ) | ||
226 | # 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 | ||
227 | # 竖版-无表格-邮储银行 账户对账单 | ||
228 | # 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单 | ||
229 | HEADERS_MAPPING.update( | ||
230 | { | ||
231 | '交易金额(元)': BASE_HEADERS_MAPPING['金额'], | ||
232 | '交易金额(元)': BASE_HEADERS_MAPPING['金额'], | ||
233 | '账户余额(元)': BASE_HEADERS_MAPPING['余额'], | ||
234 | '账户余额(元)': BASE_HEADERS_MAPPING['余额'], | ||
235 | '对手方户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
236 | '对手方账户': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
237 | } | ||
238 | ) | ||
239 | # 横版-无表格-广发银行-账户交易历史 --> 已废弃 | ||
240 | # 竖版-无表格-广发银行-账户交易历史 --> 已废弃 | ||
241 | HEADERS_MAPPING.update( | ||
242 | { | ||
243 | '会计日期': BASE_HEADERS_MAPPING['记账日期'], | ||
244 | '对手户名': BASE_HEADERS_MAPPING['对方账户名'], | ||
245 | '对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
246 | } | ||
247 | ) | ||
248 | # 招行电子账单 TODO 有英文,需测试 | ||
249 | HEADERS_MAPPING.update( | ||
250 | { | ||
251 | '对手信息': BASE_HEADERS_MAPPING['对方账户名'], | ||
252 | '摘要代码': BASE_HEADERS_MAPPING['附言'], | ||
253 | } | ||
254 | ) | ||
255 | # 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号) | ||
256 | # 横版-无表格-民生银行-无标题(客户账户) | ||
257 | # 横版-无表格-民生银行 | ||
258 | HEADERS_MAPPING.update( | ||
259 | { | ||
260 | '摘要信息': BASE_HEADERS_MAPPING['附言'], | ||
261 | '对方行名': BASE_HEADERS_MAPPING['对方开户行'], | ||
262 | } | ||
263 | ) | ||
264 | # 竖版-无表格-农业银行整数 | ||
265 | # 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单 | ||
266 | HEADERS_MAPPING.update( | ||
267 | { | ||
268 | '对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
269 | } | ||
270 | ) | ||
271 | # 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf | ||
272 | # 竖版-无表格-农业银行-扩张.pdf | ||
273 | # 竖版-无表格-农业银行-缩进.pdf | ||
274 | HEADERS_MAPPING.update( | ||
275 | { | ||
276 | '日期': BASE_HEADERS_MAPPING['记账日期'], | ||
277 | '短摘要': BASE_HEADERS_MAPPING['附言'], | ||
278 | '本次余额': BASE_HEADERS_MAPPING['余额'], | ||
279 | } | ||
280 | ) | ||
281 | # 竖版-无表格-农业银行-无标题(对手帐号) | ||
282 | HEADERS_MAPPING.update( | ||
283 | { | ||
284 | '交易后余额': BASE_HEADERS_MAPPING['余额'], | ||
285 | '对手帐号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
286 | } | ||
287 | ) | ||
288 | # 竖版-无表格-农商行(非常规) | ||
289 | HEADERS_MAPPING.update( | ||
290 | { | ||
291 | '交易说明': BASE_HEADERS_MAPPING['附言'], | ||
292 | } | ||
293 | ) | ||
294 | # 竖版-无表格-工商银行 抬头三行 活期历史明细清单 | ||
295 | HEADERS_MAPPING.update( | ||
296 | { | ||
297 | '对方账户': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
298 | } | ||
299 | ) | ||
300 | |||
301 | # -----------针式打印-全格线-------------------------------------------------------------------------------------------- | ||
302 | # 竖版-表格-建设银行-中国建设银行活期账户交易明细 | ||
303 | # 竖版-表格-建设银行-中国建设银行活期账户明细清单 | ||
304 | # 竖版-表格-建设银行-对私活期账户明细- (1).pdf | ||
305 | HEADERS_MAPPING.update( | ||
306 | { | ||
307 | '帐户余额': BASE_HEADERS_MAPPING['余额'], | ||
308 | '对方帐户名称': BASE_HEADERS_MAPPING['对方账户名'], | ||
309 | } | ||
310 | ) | ||
311 | # 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录 | ||
312 | HEADERS_MAPPING.update( | ||
313 | { | ||
314 | '交易日期 记账日期': BASE_HEADERS_MAPPING['记账日期'], | ||
315 | } | ||
316 | ) | ||
317 | |||
318 | # ----------针式打印-部分格线------------------------------------------------------------------------------------------ | ||
319 | # 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户) | ||
320 | # 竖版-特殊-邮储银行-账户交易明细(客户) | ||
321 | HEADERS_MAPPING.update( | ||
322 | { | ||
323 | '对方账号/卡号/汇票号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
324 | } | ||
325 | ) | ||
326 | |||
327 | # -------------------------------------------------------------------------------------------------------------------- | ||
328 | |||
329 | # ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果', '借贷', '收入', '支出') | ||
330 | # CLASSIFY_LIST = [ | ||
331 | # # --------------普通打印:全格线--------------------------------- | ||
332 | # # 中国银行:记账日期 记账时间 币别 金额 余额 交易名称 渠道 网点名称 附言 对方账户名 对方卡号/账号 对方开户行 | ||
333 | # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则) | ||
334 | # | ||
335 | # # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言 | ||
336 | # ('农业银行-10', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), # 横版-表格-农业银行-中国农业银行个人账户明细 | ||
337 | # | ||
338 | # # 农业银行:序号 日期 摘要 交易金额 余额 对方账号 对方名称 交易地点 渠道 附言 | ||
339 | # ('农业银行-10-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)), | ||
340 | # | ||
341 | # # 农业银行:交易日期 摘要 交易金额 余额 交易渠道 交易网点 对方账号 对方名称 附言 | ||
342 | # ('农业银行-9', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)), | ||
343 | # | ||
344 | # # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道 | ||
345 | # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行 | ||
346 | # | ||
347 | # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 渠道 | ||
348 | # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | ||
349 | # | ||
350 | # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 对方户名 对方账号 渠道 | ||
351 | # ('工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)), | ||
352 | # | ||
353 | # # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行 | ||
354 | # # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行 | ||
355 | # ('建设银行-竖版', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)), | ||
356 | # ('建设银行-横版', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
357 | # | ||
358 | # # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号 | ||
359 | # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | ||
360 | # | ||
361 | # # 支付宝:流水号 时间 名称/备注 收入 支出 账户余额 资金渠道 | ||
362 | # ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), | ||
363 | # | ||
364 | # # -----------------普通打印:部分格线-------------------------------- | ||
365 | # | ||
366 | # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名 | ||
367 | # ('农业银行-5', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
368 | # | ||
369 | # # 农业银行:日期 地点 摘要 存入 支出 余额 对方账号 对方户名 | ||
370 | # ('农业银行-8', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)), | ||
371 | |||
372 | # # 农业银行:日期 摘要 交易金额 余额 地点 交易对手账号 对方户名 | ||
373 | # ('农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)), | ||
374 | # | ||
375 | # # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注 | ||
376 | # ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)), | ||
377 | # | ||
378 | # # 中信银行:交易日期 交易摘要 收入金额 支出金额 账户余额 对方户名 对方账号 对方银行 交易流水号 | ||
379 | # ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), | ||
380 | # | ||
381 | # # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额 | ||
382 | # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
383 | |||
384 | # # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 | ||
385 | # ('建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
386 | # | ||
387 | # # -----------------普通打印:无格线------------------------------------- | ||
388 | # | ||
389 | # # 招商银行:记账日期 货币 交易金额 联机余额 冲补账 交易摘要 | ||
390 | # ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), | ||
391 | # | ||
392 | # # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 | ||
393 | # # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单 | ||
394 | # ('邮储银行-8', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)), | ||
395 | # ('邮储银行-5', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)), | ||
396 | # | ||
397 | # # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道 | ||
398 | # ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | ||
399 | # | ||
400 | # # 招商银行电子版:记账日期 货币 交易金额 联机余额 交易摘要 对手信息 | ||
401 | # ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | ||
402 | # | ||
403 | # # 民生银行:凭证类型 凭证号码 摘要信息 交易时间 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号) | ||
404 | # # 凭证类型 凭证号码 交易时间 摘要 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行 | ||
405 | # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | ||
406 | # | ||
407 | # # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名 | ||
408 | # ('农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | ||
409 | # | ||
410 | # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名 | ||
411 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
412 | # | ||
413 | # # 农业银行:日期、时间、短摘要、交易金额、本次余额、交易网点、渠道、附言 | ||
414 | # # 农业银行:日期、时间、日志号、短摘要、交易金额、本次余额、交易网点、渠道、附言 | ||
415 | # ('农业银行', (1, 2, 4, 5, None, 3, None, None, None, None, None, None, None)), | ||
416 | # ('农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
417 | # | ||
418 | # # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额 | ||
419 | # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | ||
420 | # | ||
421 | # | ||
422 | # # ================针式打印:有格线=================== | ||
423 | # | ||
424 | # # 建设银行: 摘要、交易日期、交易金额、账户余额、商户/网点号及其名称、对方账号、对方户名 --> 竖版-表格-建设银行-中国建设银行活期账户明细清单 | ||
425 | # # 交易日期、摘要、 币种、 钞汇、 交易金额、 帐户余额、对方账号、 对方帐户名称 --> 竖版-表格-建设银行-对私活期账户明细- (1) | ||
426 | # ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)), | ||
427 | # | ||
428 | # | ||
429 | # # ================针式打印:无格线=================== | ||
430 | # | ||
431 | # # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称 | ||
432 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
433 | # ] | ||
434 | |||
435 | OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None) | ||
436 | |||
437 | # { | ||
438 | # "0":"其他", | ||
439 | # "1":"普通打印-全表格-中国农业银行", | ||
440 | # "2":"普通打印-全表格-中国银行", | ||
441 | # "3":"普通打印-全表格-北京银行", | ||
442 | # "4":"普通打印-全表格-工商银行", | ||
443 | # "5":"普通打印-全表格-建设银行", | ||
444 | # "6":"普通打印-全表格-微信账单", | ||
445 | # "7":"普通打印-全表格-支付宝账单", | ||
446 | # "8":"普通打印-无格线-中国邮政储蓄银行", | ||
447 | |||
448 | # "9":"普通打印-无格线-交通银行", | ||
449 | # "10":"普通打印-无格线-农业银行整数", | ||
450 | # "11":"普通打印-无格线-农业银行银行活期扩张缩进", | ||
451 | # "12":"普通打印-无格线-招商银行", | ||
452 | # "13":"普通打印-无格线-招行电子账单", | ||
453 | # "14":"普通打印-无格线-民生银行", | ||
454 | |||
455 | # "15":"普通打印-部分格线-横版-中信银行", | ||
456 | # "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页", | ||
457 | # "17":"普通打印-部分格线-竖版-农业银行", | ||
458 | # "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细", | ||
459 | # "19":"普通打印-部分格线-竖版-平安电子账单", | ||
460 | |||
461 | # "20":"针式打印-全格线-建设银行", | ||
462 | # "21":"针式打印-部分格线-竖版-邮储银行账户交易", | ||
463 | # "22":"针式打印-部分格线-邮储银行一本通绿卡" | ||
464 | # } | ||
465 | |||
466 | # CLASSIFY_LIST = [ | ||
467 | # ('其他', OTHER_TUPLE), | ||
468 | # ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)), | ||
469 | # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), | ||
470 | # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), | ||
471 | # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | ||
472 | # ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)), | ||
473 | # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | ||
474 | # ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), | ||
475 | # | ||
476 | # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | ||
477 | # ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | ||
478 | # ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)), | ||
479 | # ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), | ||
480 | # ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | ||
481 | # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | ||
482 | # | ||
483 | # ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), | ||
484 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
485 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
486 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
487 | # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
488 | # | ||
489 | # ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)), | ||
490 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
491 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
492 | # ] | ||
493 | |||
494 | # "4":"普通打印-全表格-中国银行", | ||
495 | # "5":"普通打印-全表格-农业银行-10列", | ||
496 | # "6":"普通打印-全表格-农业银行-10列-1", | ||
497 | # "7":"普通打印-全表格-农业银行-9列", | ||
498 | # "8":"普通打印-全表格-北京银行", | ||
499 | # "9":"普通打印-全表格-工商银行", | ||
500 | # "10":"普通打印-全表格-工商银行-电子账单", | ||
501 | # "11":"普通打印-全表格-建设银行", | ||
502 | # "12":"普通打印-全表格-微信账单", | ||
503 | # "13":"普通打印-全表格-支付宝账单", | ||
504 | |||
505 | # "14":"普通打印-无格线-交通银行", | ||
506 | # "15":"普通打印-无格线-储蓄银行-5列", | ||
507 | # "16":"普通打印-无格线-储蓄银行-8列", | ||
508 | # "17":"普通打印-无格线-农业银行-扩张缩进", | ||
509 | # "18":"普通打印-无格线-农业银行-整数", | ||
510 | # "19":"普通打印-无格线-招商银行", | ||
511 | # "20":"普通打印-无格线-招商银行-电子账单", | ||
512 | # "21":"普通打印-无格线-民生银行", | ||
513 | |||
514 | # "22":"普通打印-部分格线-横版-中信银行", | ||
515 | # "23":"普通打印-部分格线-竖版-农业银行-5列", | ||
516 | # "24":"普通打印-部分格线-竖版-农业银行-8列", | ||
517 | # "25":"普通打印-部分格线-竖版-农业银行-窄页", | ||
518 | # "26":"普通打印-部分格线-竖版-平安电子账单", | ||
519 | # "27":"普通打印-部分格线-竖版-建设银行-电子账单", | ||
520 | |||
521 | # "34":"针式打印-全格线-建设银行", | ||
522 | # "35":"针式打印-部分格线-竖版-邮储银行", | ||
523 | # "36":"针式打印-部分格线-竖版-邮储银行-绿卡", | ||
524 | |||
525 | CLASSIFY_LIST = [ | ||
526 | ('其他', OTHER_TUPLE), | ||
527 | ('其他', OTHER_TUPLE), | ||
528 | ('其他', OTHER_TUPLE), | ||
529 | ('其他', OTHER_TUPLE), | ||
530 | ('普通打印-全表格-中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), | ||
531 | ('普通打印-全表格-农业银行-10列', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), | ||
532 | ('普通打印-全表格-农业银行-10列-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)), | ||
533 | ('普通打印-全表格-农业银行-9列', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)), | ||
534 | ('普通打印-全表格-北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), | ||
535 | ('普通打印-全表格-工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | ||
536 | ('普通打印-全表格-工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)), | ||
537 | ('普通打印-全表格-建设银行', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)), | ||
538 | ('普通打印-全表格-微信账单', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | ||
539 | ('普通打印-全表格-支付宝账单', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), | ||
540 | |||
541 | ('普通打印-无格线-交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | ||
542 | ('普通打印-无格线-储蓄银行-5列', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)), | ||
543 | ('普通打印-无格线-储蓄银行-8列', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)), | ||
544 | ('普通打印-无格线-农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
545 | ('普通打印-无格线-农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | ||
546 | ('普通打印-无格线-招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), | ||
547 | ('普通打印-无格线-招商银行-电子账单', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | ||
548 | ('普通打印-无格线-民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | ||
549 | |||
550 | ('普通打印-部分格线-横版-中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), | ||
551 | ('普通打印-部分格线-竖版-农业银行-5列', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
552 | ('普通打印-部分格线-竖版-农业银行-8列', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)), | ||
553 | ('普通打印-部分格线-竖版-农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)), | ||
554 | ('普通打印-部分格线-竖版-平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
555 | ('普通打印-部分格线-竖版-建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
556 | ('其他', OTHER_TUPLE), | ||
557 | ('其他', OTHER_TUPLE), | ||
558 | ('其他', OTHER_TUPLE), | ||
559 | ('其他', OTHER_TUPLE), | ||
560 | ('其他', OTHER_TUPLE), | ||
561 | ('其他', OTHER_TUPLE), | ||
562 | ('针式打印-全格线-建设银行', OTHER_TUPLE), | ||
563 | ('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
564 | ('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
565 | ('其他', OTHER_TUPLE), | ||
566 | ] | ||
567 | |||
568 | # ----------license相关------------------------------------------------------------------------------------------------ | ||
569 | |||
570 | # "0":"AVT Invioce", | ||
571 | # "1":"二手车发票", | ||
572 | # "2":"其他", | ||
573 | # "3":"护照", | ||
574 | # "28":"机动车登记证", | ||
575 | # "29":"机动车销售统一发票", | ||
576 | # "30":"港澳通行证", | ||
577 | # "31":"营业执照", | ||
578 | # "32":"行驶证", | ||
579 | # "33":"身份证", | ||
580 | # "37":"银行卡" | ||
581 | |||
582 | # 其他 | ||
583 | OTHER_CLASSIFY = 2 | ||
584 | |||
585 | # 身份证 | ||
586 | IC_CN_NAME = '身份证' | ||
587 | IC_CLASSIFY = 33 | ||
588 | IC_FIELD_ORDER_0 = (('姓名', '姓名'), | ||
589 | ('公民身份号码', '公民身份号码'), | ||
590 | ('出生年月', '出生年月'), | ||
591 | ('住址', '住址'), | ||
592 | ('性别', '性别'), | ||
593 | ('民族', '民族'),) | ||
594 | IC_FIELD_ORDER_1 = (('有效期限', '有效期限'), ('签发机关', '签发机关'),) | ||
595 | # 居住证 | ||
596 | RP_CN_NAME = '居住证' | ||
597 | RP_CLASSIFY = 10087 | ||
598 | RP_FIELD_ORDER_0 = (('姓名', '姓名'), | ||
599 | ('公民身份号码', '公民身份号码'), | ||
600 | ('出生年月', '出生年月'), | ||
601 | ('住址', '住址'), | ||
602 | ('性别', '性别'),) | ||
603 | RP_FIELD_ORDER_1 = IC_FIELD_ORDER_1 | ||
604 | # 增值税发票 | ||
605 | VAT_CN_NAME = 'VAT普票' | ||
606 | VAT_CLASSIFY = 0 | ||
607 | VAT_FIELD_ORDER = (('发票代码', '发票代码'), | ||
608 | ('发票代码(开具)', '发票代码(开具)'), | ||
609 | ('发票号码', '发票号码'), | ||
610 | ('发票号码(开具)', '发票号码(开具)'), | ||
611 | ('开票日期', '开票日期'), | ||
612 | ('校验码', '校验码'), | ||
613 | ('货物或应税劳务、服务名称', '货物或应税劳务、服务名称'), | ||
614 | ('金额合计', '开具金额合计(不含税)'), | ||
615 | ('税率', '税率'), | ||
616 | ('税额合计', '税额合计'), | ||
617 | ('价税合计小写', '价税合计(小写)'), | ||
618 | ('价税合计大写', '价税合计(大写)'), | ||
619 | ('购方名称', '购买方名称'), | ||
620 | ('购方纳税人识别号', '购买方纳税人识别号'), | ||
621 | ('购方地址、电话', '购买方地址、电话'), | ||
622 | ('购方开户行及账号', '购买方开户行及账号'), | ||
623 | ('销方名称', '销售方名称'), | ||
624 | ('销方纳税人识别号', '销售方纳税人识别号'), | ||
625 | ('销方地址、电话', '销售方地址、电话'), | ||
626 | ('销方开户行及账号', '销售方开户行及账号'), | ||
627 | ('销售方:(章)', '销售方:(章)'), | ||
628 | ('备注', '备注'),) | ||
629 | # 机动车登记证书 | ||
630 | MVC_CN_NAME = '机动车登记证书' | ||
631 | MVC_CLASSIFY = 28 | ||
632 | MVC_CLASSIFY_SE = 10086 | ||
633 | MVC_FIELD_ORDER_1_2 = (('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'), | ||
634 | ('3.登记日期', '登记日期'), | ||
635 | ('9.车辆识别代号/车架号', '车辆识别代号/车架号'), | ||
636 | ('32.车辆出厂日期', '车辆出厂日期'), | ||
637 | ('34.发证日期', '发证日期'), | ||
638 | ('30.使用性质', '使用性质'), | ||
639 | ('31.车辆获得方式', '车辆获得方式'), | ||
640 | ('4.机动车登记编号', '机动车登记编号'), | ||
641 | ('空行占位', None), | ||
642 | ('5.车辆类型', '车辆类型'), | ||
643 | ('6.车辆品牌', '车辆品牌'), | ||
644 | ('7.车辆型号', '车辆型号'), | ||
645 | ('8.车身颜色', '车身颜色'), | ||
646 | ('10.国产/进口', '国产/进口'), | ||
647 | ('11.发动机号', '发动机号'), | ||
648 | ('12.发动机型号', '发动机型号'), | ||
649 | ('15.制造厂名称', '制造厂名称'), | ||
650 | ('2.登记机关', '登记机关'), | ||
651 | ('编号', '机动车登记证书编号'),) | ||
652 | MVC_FIELD_ORDER_3_4 = ( | ||
653 | ('姓名/名称', '姓名/名称'), | ||
654 | ('身份证明名称/号码', '身份证明名称/号码'), | ||
655 | ('转移登记日期', '转移登记日期'), | ||
656 | ) | ||
657 | MVC_SE_FIELD_ORDER_1_2 = (('9.车辆识别代号/车架号', '车辆识别代号/车架号'), | ||
658 | ('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'), | ||
659 | ('空行占位', None), | ||
660 | ('3.登记日期', '登记日期'), | ||
661 | ('32.车辆出厂日期', '车辆出厂日期'), | ||
662 | ('34.发证日期', '发证日期'), | ||
663 | ('30.使用性质', '使用性质'), | ||
664 | ('31.车辆获得方式', '车辆获得方式'), | ||
665 | ('5.车辆类型', '车辆类型'), | ||
666 | ('6.车辆品牌', '车辆品牌'), | ||
667 | ('7.车辆型号', '车辆型号'), | ||
668 | ('8.车身颜色', '车身颜色'), | ||
669 | ('10.国产/进口', '国产/进口'), | ||
670 | ('11.发动机号', '发动机号'), | ||
671 | ('12.发动机型号', '发动机型号'), | ||
672 | ('13.燃料种类', '燃料种类'), | ||
673 | ('14.排量/功率', '排量/功率'), | ||
674 | ('15.制造厂名称', '制造厂名称'), | ||
675 | ('16.转向形式', '转向形式'), | ||
676 | ('17.轮距', '轮距'), | ||
677 | ('18.轮胎数', '轮胎数'), | ||
678 | ('19.轮胎规格', '轮胎规格'), | ||
679 | ('20.钢板弹簧片数', '钢板弹簧片数'), | ||
680 | ('21.轴距', '轴距'), | ||
681 | ('22.轴数', '轴数'), | ||
682 | ('23.外廓尺寸', '外廓尺寸'), | ||
683 | ('24.货厢内部尺寸', '货厢内部尺寸'), | ||
684 | ('25.总质量', '总质量'), | ||
685 | ('26.核定载质量', '核定载质量'), | ||
686 | ('27.核定载客', '核定载客'), | ||
687 | ('28.准牵引总质量', '准牵引总质量'), | ||
688 | ('29.驾驶室载客', '驾驶室载客'), | ||
689 | ('2.登记机关', '登记机关'), | ||
690 | ('4.机动车登记编号', '机动车登记编号'), | ||
691 | ('编号', '机动车登记证书编号'),) | ||
692 | MVC_SE_FIELD_ORDER_3_4 = ( | ||
693 | ('姓名/名称', '姓名/名称'), | ||
694 | ('身份证明名称/号码', '身份证明名称/号码'), | ||
695 | ('转移登记日期', '转移登记日期'), | ||
696 | ) | ||
697 | # 机动车销售统一发票 | ||
698 | MVI_CN_NAME = '机动车销售统一发票' | ||
699 | MVI_CLASSIFY = 29 | ||
700 | MVI_FIELD_ORDER = (('发票代码', '发票代码'), | ||
701 | ('发票号码', '发票号码'), | ||
702 | ('开票日期', '开票日期'), | ||
703 | ('不含税价', '不含税价'), | ||
704 | ('发票类型', '发票联'), | ||
705 | ('购方名称', '购买方名称'), | ||
706 | ('购买方身份证号或组织机构代码', '购买方证件号码'), | ||
707 | ('纳税人识别号', '纳税人识别号'), # nodo | ||
708 | ('车辆识别代码', '车架号'), | ||
709 | ('价税合计小写', '价税合计小写'), | ||
710 | ('销方名称', '销货单位名称'), | ||
711 | ('增值税税额', '增值税税额'), | ||
712 | ('增值税税率', '增值税税率'), # nodo | ||
713 | ('发票章有无', '发票章有无'), # nodo 全国统一发票监制章 销售单位章 | ||
714 | ('价税合计大写', '价税合计大写'), # nodo | ||
715 | ('', None), | ||
716 | ('发动机号码', '发动机号'), | ||
717 | ('车辆类型', '车辆类型'), # nodo | ||
718 | ('厂牌型号', '厂牌型号'), # nodo | ||
719 | ('产地', '产地'), # nodo | ||
720 | ('合格证号', '合格证号'), # nodo | ||
721 | ('进口证明书号', '进口证明书号'), # nodo | ||
722 | ('商检单号', '商检单号'), # nodo | ||
723 | ('电话', '电话'), # nodo | ||
724 | ('销方纳税人识别号', '销货方纳税人识别号'), | ||
725 | ('账号', '账号'), # nodo | ||
726 | ('地址', '地址'), # nodo | ||
727 | ('开户银行', '开户银行'), # nodo | ||
728 | ('主管税务机关及代码', '主管税务机关及代码'), # nodo | ||
729 | ('吨位', '吨位'), # nodo | ||
730 | ('限乘人数', '限乘人数'),) # nodo | ||
731 | IC_PID = VAT_PID = MVC_PID = MVI_PID = None | ||
732 | |||
733 | # 营业执照 | ||
734 | BL_CN_NAME = '营业执照' | ||
735 | BL_CLASSIFY = 31 | ||
736 | BL_PID = 41 | ||
737 | BL_FIELD_ORDER = (('注册号', '统一社会信用代码'), | ||
738 | ('企业名称', '名称'), | ||
739 | ('企业类型', '类型'), | ||
740 | ('经营者姓名', '法定代表人'), | ||
741 | ('成立日期', '成立日期'), | ||
742 | ('营业期限', '营业期限'), | ||
743 | ('注册资本', '注册资本'), | ||
744 | ('地址', '住所'), | ||
745 | ('经营范围', '经营范围'),) | ||
746 | # 二手车发票 | ||
747 | UCI_CN_NAME = '二手车发票' | ||
748 | UCI_CLASSIFY = 1 | ||
749 | UCI_PID = 60 | ||
750 | UCI_FIELD_ORDER = (('发票代码', '发票代码'), | ||
751 | ('发票号码', '发票号码'), | ||
752 | ('开票日期', '开票日期'), | ||
753 | ('车价合计', '车价合计小写'), | ||
754 | ('发票联', '发票联'), | ||
755 | ('购方单位', '买方单位/个人'), | ||
756 | ('购方号码', '买方单位代码/身份证号码'), | ||
757 | ('车架号码', '车架号'), | ||
758 | ('车价合计大写', '车价合计大写'), | ||
759 | ('二手车市场', '二手车市场'), | ||
760 | ('发票章有无', '发票章有无'), | ||
761 | ('空行占位', None), | ||
762 | ('车牌照号', '车牌照号'), | ||
763 | ('登记证号', '登记证号'), | ||
764 | ('购方地址', '买方单位/住址'), | ||
765 | ('车辆类型', '车辆类型'), | ||
766 | ('厂牌型号', '厂牌型号'), | ||
767 | ('车管所名称', '转入地车辆管理所名称'), | ||
768 | ('销方名称', '卖方单位/个人'), | ||
769 | ('销方号码', '卖方单位代码/身份证号码'), | ||
770 | ('销方地址', '卖方单位/个人住址'),) | ||
771 | # 港澳台通行证 | ||
772 | EEP_CN_NAME = '港澳台通行证' | ||
773 | EEP_CLASSIFY = 30 | ||
774 | EEP_PID = 1018 | ||
775 | EEP_FIELD_ORDER = (('中文名', '姓名'), # 英文名 | ||
776 | ('证件号码', '证件号码'), | ||
777 | ('签发次数', '换证次数(签发次数)'), | ||
778 | ('有效期限', '有效期限'), | ||
779 | ('出生日期', '出生日期'), | ||
780 | ('性别', '性别'), | ||
781 | ('签发机关', '签发机关'), | ||
782 | ('签发地点', '签发地点'),) | ||
783 | # 行驶证 | ||
784 | DL_CN_NAME = '行驶证' | ||
785 | DL_CLASSIFY = 32 | ||
786 | DL_PID = 5 | ||
787 | DL_FIELD_ORDER_0 = (('号牌号码', '1 号牌号码'), | ||
788 | ('所有人', '3 所有人'), | ||
789 | ('使用性质', '5 使用性质'), | ||
790 | ('车辆识别代码', '7 车辆识别代号'), | ||
791 | ('注册日期', '9 注册日期'), | ||
792 | ('发证日期', '10 发证日期'), | ||
793 | ('车辆类型', '2 车辆类型'), | ||
794 | ('地址', '4 住址'), | ||
795 | ('品牌型号', '6 品牌型号'), | ||
796 | ('发动机号', '8 发动机号码'),) | ||
797 | DL_FIELD_ORDER_1 = (('号牌号码', '1 号牌号码'), | ||
798 | ('档案编号', '11 档案编号'), | ||
799 | ('核定载人数', '12 核定载人数'), | ||
800 | ('总质量', '13 总质量'), | ||
801 | ('整备质量', '14 整备质量'), | ||
802 | ('核定载质量', '15 核对载质量'), | ||
803 | ('外廓尺寸', '16 外廓尺寸'), | ||
804 | ('准牵引总质量', '17 准牵引总质量'),) | ||
805 | # 护照 | ||
806 | PP_CN_NAME = '护照' | ||
807 | PP_CLASSIFY = 3 | ||
808 | PP_PID = 8 | ||
809 | PP_FIELD_ORDER = (('类型', '类型/Type'), | ||
810 | ('英文姓名', '姓名/Name'), | ||
811 | ('护照号码', '护照号码/Passport No'), | ||
812 | ('有效期至', '有效期至/Date of expiry'), | ||
813 | ('签发日期', '签发日期/Date of issue'), | ||
814 | ('国家码', '国家码/Country Code'), | ||
815 | ('性别', '性别/Sex'), | ||
816 | ('国籍', '国籍/Nationality'), | ||
817 | ('出生日期', '出生日期/Date of birth'), | ||
818 | ('出生地点', '出生地点/Place of birth'), | ||
819 | ('签发地点', '签发地点/Place of issue'),) | ||
820 | # 银行卡 | ||
821 | BC_CN_NAME = '银行卡' | ||
822 | BC_CLASSIFY = 37 | ||
823 | BC_PID = 4 | ||
824 | # BC_FIELD = (('CardNum', '银行卡号'), | ||
825 | # ('BankName', '发卡行名称'), | ||
826 | # ('CardName', '银行卡名称'), | ||
827 | # ('BankCode', '发卡行代号'), | ||
828 | # ('CardType', '银行卡类型'), | ||
829 | # ('Date', '日期')) | ||
830 | BC_FIELD_ORDER = (('BankName', '发卡行名称'), | ||
831 | ('CardNum', '银行卡号'), | ||
832 | ('CardType', '银行卡类型'),) | ||
833 | |||
834 | SUCCESS_CODE_SET = {'0', 0} | ||
835 | |||
836 | FIELD_ORDER_MAP = { | ||
837 | IC_CLASSIFY: ('有效期限', IC_FIELD_ORDER_1, IC_FIELD_ORDER_0), | ||
838 | RP_CLASSIFY: ('有效期限', RP_FIELD_ORDER_1, RP_FIELD_ORDER_0), | ||
839 | DL_CLASSIFY: ('档案编号', DL_FIELD_ORDER_1, DL_FIELD_ORDER_0), | ||
840 | MVC_CLASSIFY: ('转移登记日期', MVC_FIELD_ORDER_3_4, MVC_FIELD_ORDER_1_2), | ||
841 | MVC_CLASSIFY_SE: ('转移登记日期', MVC_SE_FIELD_ORDER_3_4, MVC_SE_FIELD_ORDER_1_2) | ||
842 | } | ||
843 | |||
844 | LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False)), | ||
845 | (IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False)), | ||
846 | (RP_CLASSIFY, (None, RP_CN_NAME, None, True, False)), | ||
847 | (BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False)), | ||
848 | (BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False)), | ||
849 | (UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False)), | ||
850 | (EEP_CLASSIFY, (EEP_PID, EEP_CN_NAME, EEP_FIELD_ORDER, False, False)), | ||
851 | (DL_CLASSIFY, (DL_PID, DL_CN_NAME, None, True, False)), | ||
852 | (PP_CLASSIFY, (PP_PID, PP_CN_NAME, PP_FIELD_ORDER, False, False)), | ||
853 | (MVC_CLASSIFY, (MVC_PID, MVC_CN_NAME, None, True, True)), | ||
854 | (VAT_CLASSIFY, (VAT_PID, VAT_CN_NAME, VAT_FIELD_ORDER, False, False))) | ||
855 | |||
856 | LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER) | ||
857 | |||
858 | OTHER_CLASSIFY_SET = {OTHER_CLASSIFY} | ||
859 | LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY} | ||
860 | LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY} |
... | @@ -440,6 +440,19 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -440,6 +440,19 @@ class Command(BaseCommand, LoggerMixin): |
440 | merged_bs_summary = {} | 440 | merged_bs_summary = {} |
441 | card_num = 1 | 441 | card_num = 1 |
442 | for role_dict in unknown_summary.values(): | 442 | for role_dict in unknown_summary.values(): |
443 | if len(role_dict) == 2 and consts.UNKNOWN_ROLE in role_dict: | ||
444 | summary_dict = role_dict.pop(consts.UNKNOWN_ROLE, {}) | ||
445 | for summary in role_dict.values(): | ||
446 | summary_dict['confidence'].extend(summary['confidence']) | ||
447 | summary_dict['role'] = summary['role'] | ||
448 | summary_dict['code'].extend(summary['code']) | ||
449 | summary_dict['print_time'].extend(summary['print_time']) | ||
450 | summary_dict['start_date'].extend(summary['start_date']) | ||
451 | summary_dict['end_date'].extend(summary['end_date']) | ||
452 | summary_dict['sheet'].extend(summary['sheet']) | ||
453 | card = '{0}_{1}'.format(consts.UNKNOWN_CARD, card_num) | ||
454 | merged_bs_summary[card] = summary_dict | ||
455 | else: | ||
443 | for summary in role_dict.values(): | 456 | for summary in role_dict.values(): |
444 | card = '{0}_{1}'.format(consts.UNKNOWN_CARD, card_num) | 457 | card = '{0}_{1}'.format(consts.UNKNOWN_CARD, card_num) |
445 | card_num += 1 | 458 | card_num += 1 |
... | @@ -460,6 +473,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -460,6 +473,7 @@ class Command(BaseCommand, LoggerMixin): |
460 | for role, summary in classify_summary.items(): | 473 | for role, summary in classify_summary.items(): |
461 | if one_card or role in card_summary['role_set']: | 474 | if one_card or role in card_summary['role_set']: |
462 | merge_role.append(role) | 475 | merge_role.append(role) |
476 | card_summary['confidence'].extend(summary['confidence']) | ||
463 | card_summary['sheet'].extend(summary['sheet']) | 477 | card_summary['sheet'].extend(summary['sheet']) |
464 | card_summary['code'].extend(summary['code']) | 478 | card_summary['code'].extend(summary['code']) |
465 | card_summary['print_time'].extend(summary['print_time']) | 479 | card_summary['print_time'].extend(summary['print_time']) | ... | ... |
... | @@ -18,7 +18,7 @@ class DocHandler: | ... | @@ -18,7 +18,7 @@ class DocHandler: |
18 | 18 | ||
19 | def get_doc_list(self, doc_queryset, business_type): | 19 | def get_doc_list(self, doc_queryset, business_type): |
20 | for doc_dict in doc_queryset: | 20 | for doc_dict in doc_queryset: |
21 | if doc_dict['status'] != DocStatus.COMPLETE.value: | 21 | if doc_dict['status'] not in [DocStatus.COMPLETE.value, DocStatus.UPLOAD_FAILED.value]: |
22 | continue | 22 | continue |
23 | doc_id = doc_dict.get('id') | 23 | doc_id = doc_dict.get('id') |
24 | doc_dict['pdf_link'] = self.get_link(doc_id, business_type) | 24 | doc_dict['pdf_link'] = self.get_link(doc_id, business_type) | ... | ... |
... | @@ -30,51 +30,118 @@ class BSWorkbook(Workbook): | ... | @@ -30,51 +30,118 @@ class BSWorkbook(Workbook): |
30 | self.MAX_MEAN = 31 | 30 | self.MAX_MEAN = 31 |
31 | 31 | ||
32 | @staticmethod | 32 | @staticmethod |
33 | def sheet_prune(ws, classify): | 33 | def header_collect(ws, sheet_header_info, header_info, max_column_list, classify): |
34 | ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT) | 34 | # sheet_header_info = { |
35 | moved_col_set = set() | 35 | # 'sheet_name': { |
36 | header_col_set = set() | 36 | # 'summary_col': 1, |
37 | # 根据第一行关键词排列 | 37 | # 'date_col': 1, |
38 | for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1): | 38 | # 'amount_col': 1, |
39 | header_value = ws.cell(1, col).value | 39 | # 'over_col': 1, |
40 | # 'income_col': 1, | ||
41 | # 'outlay_col': 1, | ||
42 | # 'borrow_col': 1, | ||
43 | # 'min_row': 2, | ||
44 | # 'find_count': 3, | ||
45 | # 'find_col': {1}, | ||
46 | # 'header': ('日期', '金额') | ||
47 | # } | ||
48 | # } | ||
49 | |||
50 | # header_info = { | ||
51 | # 'summary_col': { | ||
52 | # 5: 2, | ||
53 | # 3: 1, | ||
54 | # }, | ||
55 | # 'date_col': {}, | ||
56 | # 'amount_col': {}, | ||
57 | # 'over_col': {}, | ||
58 | # 'income_col': {}, | ||
59 | # 'outlay_col': {}, | ||
60 | # 'borrow_col': {}, | ||
61 | # } | ||
62 | |||
63 | # 第一行关键词 | ||
64 | find_count = 0 | ||
65 | for first_row in ws.iter_rows(max_row=1, min_row=1, values_only=True): | ||
66 | sheet_header_info.setdefault(ws.title, {}).setdefault(consts.HEADER_KEY, first_row) | ||
67 | for idx, header_value in enumerate(first_row): | ||
68 | if classify == consts.WECHART_CLASSIFY: | ||
69 | header_col = consts.WECHART_HEADERS_MAPPING.get(header_value) | ||
70 | else: | ||
40 | header_col = consts.HEADERS_MAPPING.get(header_value) | 71 | header_col = consts.HEADERS_MAPPING.get(header_value) |
41 | if header_col is not None and header_col not in header_col_set: | 72 | if header_col is not None: |
42 | letter = get_column_letter(col) | 73 | find_count += 1 |
43 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - col) | 74 | sheet_header_info.setdefault(ws.title, {}).setdefault(header_col, idx) |
44 | moved_col_set.add(col) | 75 | find_col_set = sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COL_KEY, set()) |
45 | header_col_set.add(header_col) | 76 | find_col_set.add(idx) |
46 | elif header_value in consts.BORROW_HEADERS_SET: | 77 | col_count = header_info.setdefault(header_col, {}).get(idx) |
47 | letter = get_column_letter(col) | 78 | header_info.setdefault(header_col, {})[idx] = 1 if col_count is None else col_count+1 |
48 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.BORROW_HEADER_COL - col) | 79 | |
49 | moved_col_set.add(col) | 80 | sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COUNT_KEY, find_count) |
50 | header_col_set.add(consts.BORROW_HEADER_COL) | 81 | min_row = 1 if find_count == 0 else 2 |
51 | elif header_value in consts.INCOME_HEADERS_SET: | 82 | sheet_header_info.setdefault(ws.title, {}).setdefault(consts.MIN_ROW_KEY, min_row) |
52 | letter = get_column_letter(col) | 83 | max_column_list.append(ws.max_column) |
53 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.INCOME_HEADER_COL - col) | 84 | |
54 | moved_col_set.add(col) | 85 | @staticmethod |
55 | header_col_set.add(consts.INCOME_HEADER_COL) | 86 | def header_statistics(sheet_header_info, header_info, classify): |
56 | elif header_value in consts.OUTLAY_HEADERS_SET: | 87 | # statistics_header_info = { |
57 | letter = get_column_letter(col) | 88 | # SUMMARY_KEY: 2, |
58 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.OUTLAY_HEADER_COL - col) | 89 | # DATE_KEY: 3, |
59 | moved_col_set.add(col) | 90 | # AMOUNT_KEY: 4, |
60 | header_col_set.add(consts.OUTLAY_HEADER_COL) | 91 | # OVER_KEY: 5, |
61 | 92 | # IMCOME_KEY: 6, | |
62 | # 缺失表头再次查找 | 93 | # OUTLAY_KEY: 7, |
63 | for header_col in range(1, consts.FIXED_COL_AMOUNT + 1): | 94 | # BORROW_KEY: 8, |
64 | if header_col in header_col_set or header_col == consts.RESULT_HEADER_COL: | 95 | # 'header': ('日期', '金额') |
65 | continue | 96 | # } |
66 | fix_col = consts.CLASSIFY_LIST[classify][1][header_col - 1] | 97 | statistics_header_info = {} |
67 | if fix_col is None: | 98 | sheet_order_list = sorted(sheet_header_info, reverse=True, |
99 | key=lambda x: sheet_header_info[x][consts.FIND_COUNT_KEY]) | ||
100 | best_sheet_info = sheet_header_info.get(sheet_order_list[0]) | ||
101 | if best_sheet_info.get(consts.FIND_COUNT_KEY, 0) == 0: | ||
102 | for key, value in consts.CLASSIFY_MAP.items(): | ||
103 | col = consts.CLASSIFY_LIST[classify][1][value] | ||
104 | statistics_header_info[key] = col - 1 if isinstance(col, int) else None | ||
105 | statistics_header_info[consts.HEADER_KEY] = consts.CLASSIFY_HEADER_LIST[classify] | ||
106 | else: | ||
107 | find_col_set = best_sheet_info.get(consts.FIND_COL_KEY, set()) | ||
108 | # SUMMARY_KEY DATE_KEY OVER_KEY BORROW_KEY | ||
109 | for key in consts.KEY_LIST: | ||
110 | col = best_sheet_info.get(key) | ||
111 | if col is None: | ||
112 | col_dict = header_info.get(key, {}) | ||
113 | for idx in sorted(col_dict, key=lambda x: col_dict[x], reverse=True): | ||
114 | if idx in find_col_set: | ||
68 | continue | 115 | continue |
69 | fix_col = fix_col + consts.FIXED_COL_AMOUNT | 116 | col = idx |
70 | if fix_col in moved_col_set: | 117 | find_col_set.add(col) |
71 | break | 118 | break |
72 | letter = get_column_letter(fix_col) | 119 | else: |
73 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - fix_col) | 120 | fixed_col = consts.CLASSIFY_LIST[classify][1][consts.CLASSIFY_MAP[key]] |
121 | if fixed_col not in find_col_set and isinstance(fixed_col, int): | ||
122 | col = fixed_col - 1 | ||
123 | find_col_set.add(col) | ||
124 | statistics_header_info[key] = col | ||
125 | statistics_header_info[consts.HEADER_KEY] = best_sheet_info.get(consts.HEADER_KEY) | ||
126 | return statistics_header_info | ||
74 | 127 | ||
75 | ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column) | 128 | @staticmethod |
76 | min_row = 1 if len(moved_col_set) == 0 else 2 | 129 | def get_data_col_min_row(sheet, sheet_header_info, header_info, classify): |
77 | return min_row | 130 | date_col = sheet_header_info.get(sheet, {}).get(consts.DATE_KEY) |
131 | if date_col is None: | ||
132 | date_col_dict = header_info.get(consts.DATE_KEY, {}) | ||
133 | find_col_set = sheet_header_info.get(sheet, {}).get(consts.FIND_COL_KEY, set()) | ||
134 | for idx in sorted(date_col_dict, key=lambda x: date_col_dict[x], reverse=True): | ||
135 | if idx in find_col_set: | ||
136 | continue | ||
137 | date_col = idx | ||
138 | break | ||
139 | else: | ||
140 | fixed_col = consts.CLASSIFY_LIST[classify][1][consts.CLASSIFY_MAP[consts.DATE_KEY]] | ||
141 | if fixed_col not in find_col_set and isinstance(fixed_col, int): | ||
142 | date_col = fixed_col - 1 | ||
143 | min_row = sheet_header_info.get(sheet, {}).get(consts.MIN_ROW_KEY, 2) | ||
144 | return date_col, min_row | ||
78 | 145 | ||
79 | @staticmethod | 146 | @staticmethod |
80 | def month_split(dti, date_list, date_statistics): | 147 | def month_split(dti, date_list, date_statistics): |
... | @@ -122,8 +189,14 @@ class BSWorkbook(Workbook): | ... | @@ -122,8 +189,14 @@ class BSWorkbook(Workbook): |
122 | reverse_trend = -1 | 189 | reverse_trend = -1 |
123 | return reverse_trend | 190 | return reverse_trend |
124 | 191 | ||
125 | def sheet_split(self, ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics): | 192 | def sheet_split(self, ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics): |
126 | for date_tuple_src in ws.iter_cols(min_col=1, max_col=1, min_row=min_row, values_only=True): | 193 | if date_col is None: |
194 | # month_info process | ||
195 | month_info = month_mapping.setdefault('xxxx-xx', []) | ||
196 | month_info.append((ws.title, min_row, ws.max_row, 0)) | ||
197 | return | ||
198 | date_col = date_col + 1 | ||
199 | for date_tuple_src in ws.iter_cols(min_col=date_col, max_col=date_col, min_row=min_row, values_only=True): | ||
127 | date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src] | 200 | date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src] |
128 | dt_array, tz_parsed = tslib.array_to_datetime( | 201 | dt_array, tz_parsed = tslib.array_to_datetime( |
129 | np.array(date_tuple, copy=False, dtype=np.object_), | 202 | np.array(date_tuple, copy=False, dtype=np.object_), |
... | @@ -199,11 +272,12 @@ class BSWorkbook(Workbook): | ... | @@ -199,11 +272,12 @@ class BSWorkbook(Workbook): |
199 | return amount_str | 272 | return amount_str |
200 | # 1.替换 | 273 | # 1.替换 |
201 | res_str = amount_str.translate(consts.TRANS) | 274 | res_str = amount_str.translate(consts.TRANS) |
202 | # 2.删除多余的- | 275 | # 2.首字符处理 |
203 | res_str = res_str[0] + res_str[1:].replace('-', '') | 276 | first_char = res_str[0] |
204 | # 3.首字符处理 | 277 | if first_char in consts.ERROR_CHARS: |
205 | if res_str[0] in consts.ERROR_CHARS: | 278 | first_char = '-' |
206 | res_str = '-{0}'.format(res_str[1:]) | 279 | # 3.删除多余的- |
280 | res_str = first_char + res_str[1:].replace('-', '') | ||
207 | # 4.逗号与句号处理 | 281 | # 4.逗号与句号处理 |
208 | if len(res_str) >= 4: | 282 | if len(res_str) >= 4: |
209 | period_idx = len(res_str) - 3 | 283 | period_idx = len(res_str) - 3 |
... | @@ -213,90 +287,132 @@ class BSWorkbook(Workbook): | ... | @@ -213,90 +287,132 @@ class BSWorkbook(Workbook): |
213 | res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:]) | 287 | res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:]) |
214 | return res_str | 288 | return res_str |
215 | 289 | ||
216 | def build_month_sheet(self, card, month_mapping, ms, is_reverse): | 290 | def build_month_sheet(self, ms, card, month_mapping, is_reverse, statistics_header_info, max_column): |
291 | summary_cell_idx = statistics_header_info.get(consts.SUMMARY_KEY) | ||
292 | date_cell_idx = statistics_header_info.get(consts.DATE_KEY) | ||
293 | amount_cell_idx = statistics_header_info.get(consts.AMOUNT_KEY) # None or src or append | ||
294 | over_cell_idx = statistics_header_info.get(consts.OVER_KEY) | ||
295 | income_cell_idx = statistics_header_info.get(consts.IMCOME_KEY) | ||
296 | outlay_cell_idx = statistics_header_info.get(consts.OUTLAY_KEY) | ||
297 | borrow_cell_idx = statistics_header_info.get(consts.BORROW_KEY) | ||
298 | header = list(statistics_header_info.get(consts.HEADER_KEY)) | ||
299 | src_header_len = len(header) | ||
300 | if max_column > src_header_len: | ||
301 | for i in range(max_column - src_header_len): | ||
302 | header.append(None) | ||
303 | |||
304 | add_col = ['核对结果'] | ||
305 | if amount_cell_idx is None: | ||
306 | if income_cell_idx is not None or outlay_cell_idx is not None: | ||
307 | add_col = ['金额', '核对结果'] | ||
308 | amount_cell_idx = len(header) | ||
309 | header.extend(add_col) | ||
310 | result_idx = len(header) - 1 | ||
311 | |||
217 | tmp_ws = self.create_sheet('tmp_ws') | 312 | tmp_ws = self.create_sheet('tmp_ws') |
218 | for month in sorted(month_mapping.keys()): | 313 | for month in sorted(month_mapping.keys()): |
219 | # 3.1.拷贝数据 | 314 | # 3.1.拷贝数据 |
220 | parts = month_mapping.get(month) | 315 | parts = month_mapping.get(month) |
221 | new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:])) | 316 | new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:])) |
222 | new_ws.append(consts.FIXED_HEADERS) | 317 | new_ws.append(header) |
223 | for part in parts: | 318 | for part in parts: |
224 | ws = self.get_sheet_by_name(part[0]) | 319 | ws = self.get_sheet_by_name(part[0]) |
225 | for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): | 320 | for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): |
321 | if any(row_value): | ||
226 | new_ws.append(row_value) | 322 | new_ws.append(row_value) |
227 | # 3.2.提取信息、高亮 | 323 | # 3.2.提取信息、高亮 |
228 | amount_mapping = {} | 324 | amount_mapping = {} |
229 | amount_fill_row = set() | 325 | amount_fill_row = set() |
326 | |||
230 | for rows in new_ws.iter_rows(min_row=2): | 327 | for rows in new_ws.iter_rows(min_row=2): |
231 | summary_cell = rows[consts.SUMMARY_IDX] | 328 | # TODO 删除空行 |
232 | date_cell = rows[consts.DATE_IDX] | 329 | summary_cell = None if summary_cell_idx is None else rows[summary_cell_idx] |
233 | amount_cell = rows[consts.AMOUNT_IDX] | 330 | date_cell = None if date_cell_idx is None else rows[date_cell_idx] |
234 | row = summary_cell.row | 331 | amount_cell = None if amount_cell_idx is None else rows[amount_cell_idx] |
332 | over_cell = None if over_cell_idx is None else rows[over_cell_idx] | ||
333 | income_cell = None if income_cell_idx is None else rows[income_cell_idx] | ||
334 | outlay_cell = None if outlay_cell_idx is None else rows[outlay_cell_idx] | ||
335 | borrow_cell = None if borrow_cell_idx is None else rows[borrow_cell_idx] | ||
336 | |||
337 | summary_cell_value = None if summary_cell is None else summary_cell.value | ||
338 | date_cell_value = None if date_cell is None else date_cell.value | ||
339 | amount_cell_value = None if amount_cell is None else amount_cell.value | ||
340 | over_cell_value = None if over_cell is None else over_cell.value | ||
341 | income_cell_value = None if income_cell is None else income_cell.value | ||
342 | outlay_cell_value = None if outlay_cell is None else outlay_cell.value | ||
343 | borrow_cell_value = None if borrow_cell is None else borrow_cell.value | ||
344 | |||
345 | # row = summary_cell.row | ||
346 | if summary_cell is not None: | ||
235 | # 关键词1提取 | 347 | # 关键词1提取 |
236 | if summary_cell.value in self.interest_keyword: | 348 | if summary_cell_value in self.interest_keyword: |
237 | ms.append((summary_cell.value, date_cell.value, amount_cell.value)) | 349 | ms.append((summary_cell_value, date_cell_value, amount_cell_value)) |
238 | # 关键词2提取至临时表 | 350 | # 关键词2提取至临时表 |
239 | elif summary_cell.value in self.salary_keyword: | 351 | elif summary_cell_value in self.salary_keyword: |
240 | tmp_ws.append((summary_cell.value, date_cell.value, amount_cell.value)) | 352 | tmp_ws.append((summary_cell_value, date_cell_value, amount_cell_value)) |
241 | # 贷款关键词高亮 | 353 | # 贷款关键词高亮 |
242 | elif summary_cell.value in self.loan_keyword: | 354 | elif summary_cell_value in self.loan_keyword: |
243 | summary_cell.fill = self.loan_fill | 355 | summary_cell.fill = self.loan_fill |
244 | 356 | ||
245 | amount_error = False | ||
246 | # 3.3.余额转数值 | 357 | # 3.3.余额转数值 |
247 | over_cell = rows[consts.OVER_IDX] | 358 | over_success = False |
359 | if over_cell is not None: | ||
248 | try: | 360 | try: |
249 | over_cell.value = locale.atof(self.amount_format(over_cell.value)) | 361 | over_cell.value = locale.atof(self.amount_format(over_cell_value)) |
250 | except Exception as e: | 362 | except Exception as e: |
251 | amount_error = True | 363 | pass |
252 | else: | 364 | else: |
365 | over_success = True | ||
253 | over_cell.number_format = numbers.FORMAT_NUMBER_00 | 366 | over_cell.number_format = numbers.FORMAT_NUMBER_00 |
254 | 367 | ||
255 | # 3.4.金额转数值 | 368 | # 3.4.金额转数值 |
369 | amount_success = False | ||
370 | if amount_cell is not None: | ||
256 | try: | 371 | try: |
257 | try: | 372 | try: |
258 | amount_cell.value = locale.atof(self.amount_format(amount_cell.value)) | 373 | amount_cell.value = locale.atof(self.amount_format(amount_cell_value)) |
259 | except Exception as e: | 374 | except Exception as e: |
260 | try: | 375 | try: |
261 | amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value)) | 376 | amount_cell.value = locale.atof(self.amount_format(income_cell_value)) |
262 | if amount_cell.value == 0: | 377 | if amount_cell.value == 0: |
263 | raise | 378 | raise |
264 | elif amount_cell.value < 0: | 379 | elif amount_cell.value < 0: |
265 | amount_cell.value = -amount_cell.value | 380 | amount_cell.value = -amount_cell.value |
266 | except Exception as e: | 381 | except Exception as e: |
267 | amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value)) | 382 | amount_cell.value = locale.atof(self.amount_format(outlay_cell_value)) |
268 | if amount_cell.value > 0: | 383 | if amount_cell.value > 0: |
269 | amount_cell.value = -amount_cell.value | 384 | amount_cell.value = -amount_cell.value |
270 | except Exception as e: | 385 | except Exception as e: |
271 | amount_error = True | 386 | pass |
272 | else: | 387 | else: |
273 | if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET: | 388 | amount_success = True |
389 | if borrow_cell_value in consts.BORROW_OUTLAY_SET: | ||
274 | amount_cell.value = -amount_cell.value | 390 | amount_cell.value = -amount_cell.value |
275 | amount_cell.number_format = numbers.FORMAT_NUMBER_00 | 391 | amount_cell.number_format = numbers.FORMAT_NUMBER_00 |
392 | if date_cell is not None: | ||
276 | same_amount_mapping = amount_mapping.get(date_cell.value, {}) | 393 | same_amount_mapping = amount_mapping.get(date_cell.value, {}) |
277 | fill_rows = same_amount_mapping.get(-amount_cell.value) | 394 | fill_rows = same_amount_mapping.get(-amount_cell.value) |
278 | if fill_rows: | 395 | if fill_rows: |
279 | amount_fill_row.add(row) | 396 | amount_fill_row.add(amount_cell.row) |
280 | amount_fill_row.update(fill_rows) | 397 | amount_fill_row.update(fill_rows) |
281 | amount_mapping.setdefault(date_cell.value, {}).setdefault( | 398 | amount_mapping.setdefault(date_cell.value, {}).setdefault( |
282 | amount_cell.value, []).append(row) | 399 | amount_cell.value, []).append(amount_cell.row) |
283 | 400 | ||
284 | # 3.5.核对结果 | 401 | # 3.5.核对结果 |
285 | if row > 2 and not amount_error: | 402 | if amount_success and over_success and amount_cell.row > 2: |
403 | amount_col_letter = get_column_letter(amount_cell_idx + 1) | ||
404 | over_col_letter = get_column_letter(over_cell_idx + 1) | ||
286 | if is_reverse: | 405 | if is_reverse: |
287 | rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format( | 406 | rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format( |
288 | row - 1, row, *self.proof_res) | 407 | amount_cell.row - 1, amount_cell.row, over_col_letter, amount_col_letter, *self.proof_res) |
289 | else: | 408 | else: |
290 | rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format( | 409 | rows[result_idx].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format( |
291 | row, row - 1, *self.proof_res) | 410 | amount_cell.row, amount_cell.row - 1, over_col_letter, amount_col_letter, *self.proof_res) |
292 | |||
293 | # 删除金额辅助列 | ||
294 | new_ws.delete_cols(consts.BORROW_HEADER_COL, amount=new_ws.max_column) | ||
295 | 411 | ||
296 | # 3.6.同一天相同进出账高亮 | 412 | # 3.6.同一天相同进出账高亮 |
297 | del amount_mapping | 413 | del amount_mapping |
298 | for row in amount_fill_row: | 414 | for row in amount_fill_row: |
299 | new_ws[row][consts.AMOUNT_IDX].fill = self.amount_fill | 415 | new_ws[row][amount_cell_idx].fill = self.amount_fill |
300 | 416 | ||
301 | # 关键词2信息提取 | 417 | # 关键词2信息提取 |
302 | ms.append(self.blank_row) | 418 | ms.append(self.blank_row) |
... | @@ -319,21 +435,29 @@ class BSWorkbook(Workbook): | ... | @@ -319,21 +435,29 @@ class BSWorkbook(Workbook): |
319 | # } | 435 | # } |
320 | # } | 436 | # } |
321 | for card, summary in bs_summary.items(): | 437 | for card, summary in bs_summary.items(): |
322 | # 1.原表修剪、排列、按照月份分割 | 438 | # 1.原表表头收集、按照月份分割 |
439 | # 1.1 总结首行信息 | ||
440 | classify = summary.get('classify', 0) | ||
441 | sheet_header_info = {} | ||
442 | header_info = {} | ||
443 | max_column_list = [] | ||
444 | for sheet in summary.get('sheet', []): | ||
445 | ws = self.get_sheet_by_name(sheet) | ||
446 | self.header_collect(ws, sheet_header_info, header_info, max_column_list, classify) | ||
447 | statistics_header_info = self.header_statistics(sheet_header_info, header_info, classify) | ||
448 | max_column = max(max_column_list) | ||
449 | |||
450 | # 1.2.按月份分割 min_row 正文第一行 date_col 日期行 | ||
323 | start_date = summary.get('start_date') | 451 | start_date = summary.get('start_date') |
324 | end_date = summary.get('end_date') | 452 | end_date = summary.get('end_date') |
325 | date_statistics = False | 453 | date_statistics = True if start_date is None or end_date is None else False # 用于判断是否需要收集各表中日期 |
326 | if start_date is None or end_date is None: | 454 | date_list = [] # 用于收集各表中日期 |
327 | date_statistics = True | 455 | month_mapping = {} # 用于创建月份表 |
328 | date_list = [] | 456 | reverse_trend_list = [] # 用于判断倒序与正序 |
329 | month_mapping = {} | ||
330 | reverse_trend_list = [] | ||
331 | for sheet in summary.get('sheet', []): | 457 | for sheet in summary.get('sheet', []): |
332 | ws = self.get_sheet_by_name(sheet) | 458 | ws = self.get_sheet_by_name(sheet) |
333 | # 1.1.删除多余列、排列 | 459 | date_col, min_row = self.get_data_col_min_row(sheet, sheet_header_info, header_info, classify) |
334 | min_row = self.sheet_prune(ws, summary.get('classify', 0)) | 460 | self.sheet_split(ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics) |
335 | # 1.2.按月份分割 | ||
336 | self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics) | ||
337 | 461 | ||
338 | if date_statistics is True and len(date_list) > 1: | 462 | if date_statistics is True and len(date_list) > 1: |
339 | start_date = min(date_list) if start_date is None else start_date | 463 | start_date = min(date_list) if start_date is None else start_date |
... | @@ -353,7 +477,7 @@ class BSWorkbook(Workbook): | ... | @@ -353,7 +477,7 @@ class BSWorkbook(Workbook): |
353 | for month_list in month_mapping.values(): | 477 | for month_list in month_mapping.values(): |
354 | month_list.sort(key=lambda x: x[-1], reverse=is_reverse) | 478 | month_list.sort(key=lambda x: x[-1], reverse=is_reverse) |
355 | 479 | ||
356 | self.build_month_sheet(card, month_mapping, ms, is_reverse) | 480 | self.build_month_sheet(ms, card, month_mapping, is_reverse, statistics_header_info, max_column) |
357 | 481 | ||
358 | # 4.删除原表 | 482 | # 4.删除原表 |
359 | for sheet in summary.get('sheet'): | 483 | for sheet in summary.get('sheet'): | ... | ... |
src/apps/doc/ocr/wb_bak.py
0 → 100644
1 | import locale | ||
2 | import numpy as np | ||
3 | from pandas._libs import tslib | ||
4 | from pandas._libs.tslibs.nattype import NaTType | ||
5 | from pandas.core.indexes.datetimes import DatetimeIndex | ||
6 | from openpyxl import Workbook | ||
7 | from openpyxl.styles import Border, Side, PatternFill, numbers | ||
8 | from openpyxl.utils import get_column_letter | ||
9 | from apps.doc import consts | ||
10 | |||
11 | |||
12 | class BSWorkbook(Workbook): | ||
13 | |||
14 | def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs): | ||
15 | super().__init__(*args, **kwargs) | ||
16 | locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') | ||
17 | self.meta_sheet_title = '关键信息提取和展示' | ||
18 | self.blank_row = (None,) | ||
19 | self.code_header = ('页数', '电子回单验证码') | ||
20 | self.date_header = ('打印时间', '起始日期', '终止日期', '流水区间结果') | ||
21 | self.keyword_header = ('关键词', '记账日期', '金额') | ||
22 | self.interest_keyword = interest_keyword | ||
23 | self.salary_keyword = salary_keyword | ||
24 | self.loan_keyword = loan_keyword | ||
25 | self.proof_res = ('对', '错') | ||
26 | self.loan_fill = PatternFill("solid", fgColor="00FFCC00") | ||
27 | self.amount_fill = PatternFill("solid", fgColor="00FFFF00") | ||
28 | # self.bd = Side(style='thin', color="000000") | ||
29 | # self.border = Border(left=self.bd, top=self.bd, right=self.bd, bottom=self.bd) | ||
30 | self.MAX_MEAN = 31 | ||
31 | |||
32 | @staticmethod | ||
33 | def sheet_prune(ws, classify): | ||
34 | ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT) | ||
35 | moved_col_set = set() | ||
36 | header_col_set = set() | ||
37 | # 根据第一行关键词排列 | ||
38 | for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1): | ||
39 | header_value = ws.cell(1, col).value | ||
40 | header_col = consts.HEADERS_MAPPING.get(header_value) | ||
41 | if header_col is not None and header_col not in header_col_set: | ||
42 | letter = get_column_letter(col) | ||
43 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - col) | ||
44 | moved_col_set.add(col) | ||
45 | header_col_set.add(header_col) | ||
46 | elif header_value in consts.BORROW_HEADERS_SET: | ||
47 | letter = get_column_letter(col) | ||
48 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.BORROW_HEADER_COL - col) | ||
49 | moved_col_set.add(col) | ||
50 | header_col_set.add(consts.BORROW_HEADER_COL) | ||
51 | elif header_value in consts.INCOME_HEADERS_SET: | ||
52 | letter = get_column_letter(col) | ||
53 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.INCOME_HEADER_COL - col) | ||
54 | moved_col_set.add(col) | ||
55 | header_col_set.add(consts.INCOME_HEADER_COL) | ||
56 | elif header_value in consts.OUTLAY_HEADERS_SET: | ||
57 | letter = get_column_letter(col) | ||
58 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.OUTLAY_HEADER_COL - col) | ||
59 | moved_col_set.add(col) | ||
60 | header_col_set.add(consts.OUTLAY_HEADER_COL) | ||
61 | |||
62 | # 缺失表头再次查找 | ||
63 | for header_col in range(1, consts.FIXED_COL_AMOUNT + 1): | ||
64 | if header_col in header_col_set or header_col == consts.RESULT_HEADER_COL: | ||
65 | continue | ||
66 | fix_col = consts.CLASSIFY_LIST[classify][1][header_col - 1] | ||
67 | if fix_col is None: | ||
68 | continue | ||
69 | fix_col = fix_col + consts.FIXED_COL_AMOUNT | ||
70 | if fix_col in moved_col_set: | ||
71 | break | ||
72 | letter = get_column_letter(fix_col) | ||
73 | ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - fix_col) | ||
74 | |||
75 | ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column) | ||
76 | min_row = 1 if len(moved_col_set) == 0 else 2 | ||
77 | return min_row | ||
78 | |||
79 | @staticmethod | ||
80 | def month_split(dti, date_list, date_statistics): | ||
81 | month_list = [] | ||
82 | idx_list = [] | ||
83 | month_pre = None | ||
84 | for idx, month_str in enumerate(dti.strftime('%Y-%m')): | ||
85 | if isinstance(month_str, float): | ||
86 | continue | ||
87 | if month_str != month_pre: | ||
88 | month_list.append(month_str) | ||
89 | if month_pre is None: | ||
90 | if date_statistics: | ||
91 | date_list.append(dti[idx].date()) | ||
92 | idx = 0 | ||
93 | idx_list.append(idx) | ||
94 | month_pre = month_str | ||
95 | if date_statistics: | ||
96 | for idx in range(len(dti) - 1, -1, -1): | ||
97 | if isinstance(dti[idx], NaTType): | ||
98 | continue | ||
99 | date_list.append(dti[idx].date()) | ||
100 | break | ||
101 | return month_list, idx_list | ||
102 | |||
103 | @staticmethod | ||
104 | def get_reverse_trend(day_idx, idx_list): | ||
105 | reverse_trend = 0 | ||
106 | pre_day = None | ||
107 | for idx, day in enumerate(day_idx): | ||
108 | if np.isnan(day): | ||
109 | continue | ||
110 | if idx in idx_list or pre_day is None: | ||
111 | pre_day = day | ||
112 | continue | ||
113 | if day < pre_day: | ||
114 | reverse_trend += 1 | ||
115 | pre_day = day | ||
116 | elif day > pre_day: | ||
117 | reverse_trend -= 1 | ||
118 | pre_day = day | ||
119 | if reverse_trend > 0: | ||
120 | reverse_trend = 1 | ||
121 | elif reverse_trend < 0: | ||
122 | reverse_trend = -1 | ||
123 | return reverse_trend | ||
124 | |||
125 | def sheet_split(self, ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics): | ||
126 | for date_tuple_src in ws.iter_cols(min_col=1, max_col=1, min_row=min_row, values_only=True): | ||
127 | date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src] | ||
128 | dt_array, tz_parsed = tslib.array_to_datetime( | ||
129 | np.array(date_tuple, copy=False, dtype=np.object_), | ||
130 | errors="coerce", | ||
131 | utc=False, | ||
132 | dayfirst=False, | ||
133 | yearfirst=False, | ||
134 | require_iso8601=True, | ||
135 | ) | ||
136 | dti = DatetimeIndex(dt_array, tz=None, name=None) | ||
137 | |||
138 | month_list, idx_list = self.month_split(dti, date_list, date_statistics) | ||
139 | |||
140 | if len(month_list) == 0: | ||
141 | # month_info process | ||
142 | month_info = month_mapping.setdefault('xxxx-xx', []) | ||
143 | month_info.append((ws.title, min_row, ws.max_row, 0)) | ||
144 | else: | ||
145 | # reverse_trend_list process | ||
146 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) | ||
147 | reverse_trend_list.append(reverse_trend) | ||
148 | # month_info process | ||
149 | day_idx = dti.day | ||
150 | idx_list_max_idx = len(idx_list) - 1 | ||
151 | for i, item in enumerate(month_list): | ||
152 | if i == idx_list_max_idx: | ||
153 | day_mean = np.mean(day_idx[idx_list[i]:].dropna()) | ||
154 | month_mapping.setdefault(item, []).append( | ||
155 | (ws.title, idx_list[i] + min_row, ws.max_row, day_mean)) | ||
156 | else: | ||
157 | day_mean = np.mean(day_idx[idx_list[i]: idx_list[i + 1]].dropna()) | ||
158 | month_mapping.setdefault(item, []).append( | ||
159 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) | ||
160 | |||
161 | def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): | ||
162 | if start_date is None or end_date is None: | ||
163 | timedelta = None | ||
164 | else: | ||
165 | timedelta = (end_date - start_date).days | ||
166 | metadata_rows = [ | ||
167 | ('流水识别置信度', confidence), | ||
168 | self.blank_row, | ||
169 | self.code_header, | ||
170 | ] | ||
171 | metadata_rows.extend(code) | ||
172 | metadata_rows.extend( | ||
173 | [self.blank_row, | ||
174 | self.date_header, | ||
175 | (print_time, start_date, end_date, timedelta), | ||
176 | self.blank_row, | ||
177 | self.keyword_header] | ||
178 | ) | ||
179 | return metadata_rows | ||
180 | |||
181 | def create_meta_sheet(self, card): | ||
182 | if self.worksheets[0].title == 'Sheet': | ||
183 | ms = self.worksheets[0] | ||
184 | ms.title = '{0}({1})'.format(self.meta_sheet_title, card[-6:]) | ||
185 | else: | ||
186 | ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card[-6:])) | ||
187 | return ms | ||
188 | |||
189 | def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date): | ||
190 | metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date) | ||
191 | ms = self.create_meta_sheet(card) | ||
192 | for row in metadata_rows: | ||
193 | ms.append(row) | ||
194 | return ms | ||
195 | |||
196 | @staticmethod | ||
197 | def amount_format(amount_str): | ||
198 | if not isinstance(amount_str, str) or amount_str == '': | ||
199 | return amount_str | ||
200 | # 1.替换 | ||
201 | res_str = amount_str.translate(consts.TRANS) | ||
202 | # 2.首字符处理 | ||
203 | first_char = res_str[0] | ||
204 | if first_char in consts.ERROR_CHARS: | ||
205 | first_char = '-' | ||
206 | # 3.删除多余的- | ||
207 | res_str = first_char + res_str[1:].replace('-', '') | ||
208 | # 4.逗号与句号处理 | ||
209 | if len(res_str) >= 4: | ||
210 | period_idx = len(res_str) - 3 | ||
211 | if res_str[period_idx] == '.' and res_str[period_idx - 1] == ',': | ||
212 | res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:]) | ||
213 | elif res_str[period_idx] == ',': | ||
214 | res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:]) | ||
215 | return res_str | ||
216 | |||
217 | def build_month_sheet(self, card, month_mapping, ms, is_reverse): | ||
218 | tmp_ws = self.create_sheet('tmp_ws') | ||
219 | for month in sorted(month_mapping.keys()): | ||
220 | # 3.1.拷贝数据 | ||
221 | parts = month_mapping.get(month) | ||
222 | new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:])) | ||
223 | new_ws.append(consts.FIXED_HEADERS) | ||
224 | for part in parts: | ||
225 | ws = self.get_sheet_by_name(part[0]) | ||
226 | for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): | ||
227 | new_ws.append(row_value) | ||
228 | # 3.2.提取信息、高亮 | ||
229 | amount_mapping = {} | ||
230 | amount_fill_row = set() | ||
231 | for rows in new_ws.iter_rows(min_row=2): | ||
232 | summary_cell = rows[consts.SUMMARY_IDX] | ||
233 | date_cell = rows[consts.DATE_IDX] | ||
234 | amount_cell = rows[consts.AMOUNT_IDX] | ||
235 | row = summary_cell.row | ||
236 | # 关键词1提取 | ||
237 | if summary_cell.value in self.interest_keyword: | ||
238 | ms.append((summary_cell.value, date_cell.value, amount_cell.value)) | ||
239 | # 关键词2提取至临时表 | ||
240 | elif summary_cell.value in self.salary_keyword: | ||
241 | tmp_ws.append((summary_cell.value, date_cell.value, amount_cell.value)) | ||
242 | # 贷款关键词高亮 | ||
243 | elif summary_cell.value in self.loan_keyword: | ||
244 | summary_cell.fill = self.loan_fill | ||
245 | |||
246 | amount_error = False | ||
247 | # 3.3.余额转数值 | ||
248 | over_cell = rows[consts.OVER_IDX] | ||
249 | try: | ||
250 | over_cell.value = locale.atof(self.amount_format(over_cell.value)) | ||
251 | except Exception as e: | ||
252 | amount_error = True | ||
253 | else: | ||
254 | over_cell.number_format = numbers.FORMAT_NUMBER_00 | ||
255 | |||
256 | # 3.4.金额转数值 | ||
257 | try: | ||
258 | try: | ||
259 | amount_cell.value = locale.atof(self.amount_format(amount_cell.value)) | ||
260 | except Exception as e: | ||
261 | try: | ||
262 | amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value)) | ||
263 | if amount_cell.value == 0: | ||
264 | raise | ||
265 | elif amount_cell.value < 0: | ||
266 | amount_cell.value = -amount_cell.value | ||
267 | except Exception as e: | ||
268 | amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value)) | ||
269 | if amount_cell.value > 0: | ||
270 | amount_cell.value = -amount_cell.value | ||
271 | except Exception as e: | ||
272 | amount_error = True | ||
273 | else: | ||
274 | if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET: | ||
275 | amount_cell.value = -amount_cell.value | ||
276 | amount_cell.number_format = numbers.FORMAT_NUMBER_00 | ||
277 | same_amount_mapping = amount_mapping.get(date_cell.value, {}) | ||
278 | fill_rows = same_amount_mapping.get(-amount_cell.value) | ||
279 | if fill_rows: | ||
280 | amount_fill_row.add(row) | ||
281 | amount_fill_row.update(fill_rows) | ||
282 | amount_mapping.setdefault(date_cell.value, {}).setdefault( | ||
283 | amount_cell.value, []).append(row) | ||
284 | |||
285 | # 3.5.核对结果 | ||
286 | if row > 2 and not amount_error: | ||
287 | if is_reverse: | ||
288 | rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format( | ||
289 | row - 1, row, *self.proof_res) | ||
290 | else: | ||
291 | rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format( | ||
292 | row, row - 1, *self.proof_res) | ||
293 | |||
294 | # 删除金额辅助列 | ||
295 | new_ws.delete_cols(consts.BORROW_HEADER_COL, amount=new_ws.max_column) | ||
296 | |||
297 | # 3.6.同一天相同进出账高亮 | ||
298 | del amount_mapping | ||
299 | for row in amount_fill_row: | ||
300 | new_ws[row][consts.AMOUNT_IDX].fill = self.amount_fill | ||
301 | |||
302 | # 关键词2信息提取 | ||
303 | ms.append(self.blank_row) | ||
304 | ms.append(self.keyword_header) | ||
305 | for row in tmp_ws.iter_rows(values_only=True): | ||
306 | ms.append(row) | ||
307 | self.remove(tmp_ws) | ||
308 | |||
309 | def bs_rebuild(self, bs_summary): | ||
310 | # bs_summary = { | ||
311 | # '卡号': { | ||
312 | # 'classify': 0, | ||
313 | # 'confidence': 0.9, | ||
314 | # 'role': '柳雪', | ||
315 | # 'code': [('page', 'code')], | ||
316 | # 'print_time': 'datetime', | ||
317 | # 'start_date': 'datetime', | ||
318 | # 'end_date': 'datetime', | ||
319 | # 'sheet': ['sheet_name'] | ||
320 | # } | ||
321 | # } | ||
322 | for card, summary in bs_summary.items(): | ||
323 | # 1.原表修剪、排列、按照月份分割 | ||
324 | start_date = summary.get('start_date') | ||
325 | end_date = summary.get('end_date') | ||
326 | date_statistics = False | ||
327 | if start_date is None or end_date is None: | ||
328 | date_statistics = True | ||
329 | date_list = [] | ||
330 | month_mapping = {} | ||
331 | reverse_trend_list = [] | ||
332 | for sheet in summary.get('sheet', []): | ||
333 | ws = self.get_sheet_by_name(sheet) | ||
334 | # 1.1.删除多余列、排列 | ||
335 | min_row = self.sheet_prune(ws, summary.get('classify', 0)) | ||
336 | # 1.2.按月份分割 | ||
337 | self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics) | ||
338 | |||
339 | if date_statistics is True and len(date_list) > 1: | ||
340 | start_date = min(date_list) if start_date is None else start_date | ||
341 | end_date = max(date_list) if end_date is None else end_date | ||
342 | |||
343 | # 2.元信息提取表 | ||
344 | ms = self.build_meta_sheet(card, | ||
345 | summary.get('confidence', 1), | ||
346 | summary.get('code'), | ||
347 | summary.get('print_time'), | ||
348 | start_date, | ||
349 | end_date) | ||
350 | |||
351 | # 3.创建月份表、提取/高亮关键行 | ||
352 | # 倒序处理 | ||
353 | is_reverse = True if sum(reverse_trend_list) > 0 else False | ||
354 | for month_list in month_mapping.values(): | ||
355 | month_list.sort(key=lambda x: x[-1], reverse=is_reverse) | ||
356 | |||
357 | self.build_month_sheet(card, month_mapping, ms, is_reverse) | ||
358 | |||
359 | # 4.删除原表 | ||
360 | for sheet in summary.get('sheet'): | ||
361 | self.remove(self.get_sheet_by_name(sheet)) | ||
362 | |||
363 | def license_rebuild(self, license_summary, document_scheme): | ||
364 | for classify, (_, name, field_order, side_diff, scheme_diff) in consts.LICENSE_ORDER: | ||
365 | license_list = license_summary.get(classify) | ||
366 | if not license_list: | ||
367 | continue | ||
368 | ws = self.create_sheet(name) | ||
369 | if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
370 | classify = consts.MVC_CLASSIFY_SE | ||
371 | for license_dict in license_list: | ||
372 | if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1': | ||
373 | license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict) | ||
374 | continue | ||
375 | if side_diff: | ||
376 | key, field_order_yes, field_order_no = consts.FIELD_ORDER_MAP.get(classify) | ||
377 | field_order = field_order_yes if key in license_dict else field_order_no | ||
378 | for search_field, write_field in field_order: | ||
379 | ws.append((write_field, license_dict.get(search_field, ''))) | ||
380 | ws.append((None, )) | ||
381 | |||
382 | def skip_img_sheet(self, skip_img): | ||
383 | if skip_img: | ||
384 | ws = self.create_sheet(consts.SKIP_IMG_SHEET_NAME) | ||
385 | ws.append(consts.SKIP_IMG_SHEET_HEADER) | ||
386 | for img_tuple in skip_img: | ||
387 | ws.append(img_tuple) | ||
388 | |||
389 | def rebuild(self, bs_summary, license_summary, skip_img, document_scheme): | ||
390 | self.bs_rebuild(bs_summary) | ||
391 | self.license_rebuild(license_summary, document_scheme) | ||
392 | self.skip_img_sheet(skip_img) |
-
Please register or sign in to post a comment