2a723026 by 周伟奇

modify wb header

1 parent 3c98a180
...@@ -53,15 +53,19 @@ TRANS_MAP = { ...@@ -53,15 +53,19 @@ TRANS_MAP = {
53 'L': "1", 53 'L': "1",
54 54
55 'A': "4", 55 'A': "4",
56
56 's': "5", 57 's': "5",
57 'S': "5", 58 'S': "5",
59
58 'b': "6", 60 'b': "6",
61
59 'g': "9", 62 'g': "9",
60 'E': "9", 63 'E': "9",
64
61 'B': "13", 65 'B': "13",
62 } 66 }
63 TRANS = str.maketrans(TRANS_MAP) 67 TRANS = str.maketrans(TRANS_MAP)
64 ERROR_CHARS = {'.', ':', ':', '•', '} 68 ERROR_CHARS = {'.', '。', ':', ':', '•', '·', ',', ','}
65 SKIP_IMG_SHEET_NAME = '未处理图片' 69 SKIP_IMG_SHEET_NAME = '未处理图片'
66 SKIP_IMG_SHEET_HEADER = ('页码', '序号') 70 SKIP_IMG_SHEET_HEADER = ('页码', '序号')
67 71
...@@ -70,12 +74,34 @@ UNKNOWN_CARD = '未知卡号' ...@@ -70,12 +74,34 @@ UNKNOWN_CARD = '未知卡号'
70 UNKNOWN_ROLE = '未知户名' 74 UNKNOWN_ROLE = '未知户名'
71 DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d'] 75 DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d']
72 76
73 AMOUNT_COL_TITLE_SET = {"交易金额", "金额", "收入/支出金额", "发生额"}
74 OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
75 PROOF_COL_TITLE = '核对结果' 77 PROOF_COL_TITLE = '核对结果'
76 PROOF_RES = ('对', '错') 78 PROOF_RES = ('对', '错')
77 META_SHEET_TITLE = '关键信息提取和展示' 79 META_SHEET_TITLE = '关键信息提取和展示'
78 80
81 SUMMARY_KEY = 'summary_col'
82 DATE_KEY = 'date_col'
83 AMOUNT_KEY = 'amount_col'
84 OVER_KEY = 'over_col'
85 IMCOME_KEY = 'income_col'
86 OUTLAY_KEY = 'outlay_col'
87 BORROW_KEY = 'borrow_col'
88 MIN_ROW_KEY = 'min_row'
89 FIND_COUNT_KEY = 'find_count'
90 FIND_COL_KEY = 'find_col'
91 HEADER_KEY = 'header'
92
93 KEY_LIST = [SUMMARY_KEY, DATE_KEY, OVER_KEY, BORROW_KEY, AMOUNT_KEY, IMCOME_KEY, OUTLAY_KEY]
94
95 CLASSIFY_MAP = {
96 SUMMARY_KEY: 5,
97 DATE_KEY: 0,
98 AMOUNT_KEY: 2,
99 OVER_KEY: 3,
100 IMCOME_KEY: 11,
101 OUTLAY_KEY: 12,
102 BORROW_KEY: 10,
103 }
104
79 FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', 105 FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号',
80 '对方开户行', '核对结果', '借贷', '收入', '支出') 106 '对方开户行', '核对结果', '借贷', '收入', '支出')
81 FIXED_COL_AMOUNT = len(FIXED_HEADERS) 107 FIXED_COL_AMOUNT = len(FIXED_HEADERS)
...@@ -103,36 +129,60 @@ OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取 ...@@ -103,36 +129,60 @@ OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取
103 129
104 # ------------------普通打印-全格线-------------------------------------------------------------------------------------- 130 # ------------------普通打印-全格线--------------------------------------------------------------------------------------
105 HEADERS_MAPPING = {} 131 HEADERS_MAPPING = {}
132
133 # 借贷
134 HEADERS_MAPPING.update(
135 {
136 '借贷': BORROW_KEY,
137 '借贷状态': BORROW_KEY,
138 '收/支': BORROW_KEY,
139 }
140 )
141
142 # 收入
143 HEADERS_MAPPING.update(
144 {
145 '收入金额': IMCOME_KEY,
146 '收入': IMCOME_KEY,
147 '存入': IMCOME_KEY,
148 '存入金额(贷)': IMCOME_KEY,
149 '存入金额(贷)': IMCOME_KEY,
150 }
151 )
152
153 # 支出
154 HEADERS_MAPPING.update(
155 {
156 '支出金额': OUTLAY_KEY,
157 '支出': OUTLAY_KEY,
158 '支取金额(借)': OUTLAY_KEY,
159 '支取金额(借)': OUTLAY_KEY,
160 }
161 )
162
163
106 # 横版-表格-中国银行(不规则) 164 # 横版-表格-中国银行(不规则)
107 HEADERS_MAPPING.update( 165 HEADERS_MAPPING.update(
108 { 166 {
109 '记账日期': BASE_HEADERS_MAPPING['记账日期'], 167 '记账日期': DATE_KEY,
110 '记账时间': BASE_HEADERS_MAPPING['记账时间'], 168 '金额': AMOUNT_KEY,
111 '金额': BASE_HEADERS_MAPPING['金额'], 169 '余额': OVER_KEY,
112 '余额': BASE_HEADERS_MAPPING['余额'], 170 '附言': SUMMARY_KEY,
113 '交易名称': BASE_HEADERS_MAPPING['交易名称'],
114 '附言': BASE_HEADERS_MAPPING['附言'],
115 '对方账户名': BASE_HEADERS_MAPPING['对方账户名'],
116 '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
117 '对方开户行': BASE_HEADERS_MAPPING['对方开户行'],
118 } 171 }
119 ) 172 )
120 # 横版-表格-农业银行-中国农业银行个人账户明细 173 # 横版-表格-农业银行-中国农业银行个人账户明细
121 HEADERS_MAPPING.update( 174 HEADERS_MAPPING.update(
122 { 175 {
123 '交易日期': BASE_HEADERS_MAPPING['记账日期'], 176 '交易日期': DATE_KEY,
124 # '存入': BASE_HEADERS_MAPPING['金额'], 177 # '存入': AMOUNT_KEY,
125 '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], 178 '摘要': SUMMARY_KEY,
126 '对方名称': BASE_HEADERS_MAPPING['对方账户名'],
127 '摘要': BASE_HEADERS_MAPPING['附言'],
128 } 179 }
129 ) 180 )
130 # 横版-表格-北京银行 181 # 横版-表格-北京银行
131 HEADERS_MAPPING.update( 182 HEADERS_MAPPING.update(
132 { 183 {
133 '业务摘要': BASE_HEADERS_MAPPING['附言'], 184 '业务摘要': SUMMARY_KEY,
134 '发生额': BASE_HEADERS_MAPPING['金额'], 185 '发生额': AMOUNT_KEY,
135 '对方户名': BASE_HEADERS_MAPPING['对方账户名'],
136 } 186 }
137 ) 187 )
138 # 横版-表格-工商银行 借记卡账户历史明细清单 188 # 横版-表格-工商银行 借记卡账户历史明细清单
...@@ -142,8 +192,8 @@ HEADERS_MAPPING.update( ...@@ -142,8 +192,8 @@ HEADERS_MAPPING.update(
142 # 工商银行历史明细(申请单号:20042501303039397888) 192 # 工商银行历史明细(申请单号:20042501303039397888)
143 HEADERS_MAPPING.update( 193 HEADERS_MAPPING.update(
144 { 194 {
145 '收入/支出金额': BASE_HEADERS_MAPPING['金额'], 195 '收入/支出金额': AMOUNT_KEY,
146 '工作日期': BASE_HEADERS_MAPPING['记账日期'], 196 '工作日期': DATE_KEY,
147 } 197 }
148 ) 198 )
149 199
...@@ -153,26 +203,23 @@ HEADERS_MAPPING.update( ...@@ -153,26 +203,23 @@ HEADERS_MAPPING.update(
153 # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2) 203 # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2)
154 HEADERS_MAPPING.update( 204 HEADERS_MAPPING.update(
155 { 205 {
156 '交易金额': BASE_HEADERS_MAPPING['金额'], 206 '交易金额': AMOUNT_KEY,
157 '账户余额': BASE_HEADERS_MAPPING['余额'], 207 '账户余额': OVER_KEY,
158 '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
159 } 208 }
160 ) 209 )
161 # 微信 210 # 微信
162 HEADERS_MAPPING.update( 211 HEADERS_MAPPING.update(
163 { 212 {
164 '交易时间': BASE_HEADERS_MAPPING['记账时间'], 213 '交易类型': SUMMARY_KEY,
165 '交易类型': BASE_HEADERS_MAPPING['附言'], 214 '金额(元)': AMOUNT_KEY,
166 '金额(元)': BASE_HEADERS_MAPPING['金额'], 215 '金额(元)': AMOUNT_KEY,
167 '金额(元)': BASE_HEADERS_MAPPING['金额'],
168 '交易对方': BASE_HEADERS_MAPPING['对方账户名'],
169 } 216 }
170 ) 217 )
171 # 支付宝 218 # 支付宝
172 HEADERS_MAPPING.update( 219 HEADERS_MAPPING.update(
173 { 220 {
174 '时间': BASE_HEADERS_MAPPING['记账日期'], 221 '时间': DATE_KEY,
175 '名称/备注': BASE_HEADERS_MAPPING['附言'], 222 '名称/备注': SUMMARY_KEY,
176 } 223 }
177 ) 224 )
178 225
...@@ -182,33 +229,27 @@ HEADERS_MAPPING.update( ...@@ -182,33 +229,27 @@ HEADERS_MAPPING.update(
182 # 竖版-无表格-农业银行CH-B008805428 229 # 竖版-无表格-农业银行CH-B008805428
183 HEADERS_MAPPING.update( 230 HEADERS_MAPPING.update(
184 { 231 {
185 '摘要/附言': BASE_HEADERS_MAPPING['附言'], 232 '摘要/附言': SUMMARY_KEY,
186 '交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
187 } 233 }
188 ) 234 )
189 # 农业银行-窄页 235 # 农业银行-窄页
190 HEADERS_MAPPING.update( 236
191 {
192 '交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
193 }
194 )
195 # 竖版-特殊-农商行 237 # 竖版-特殊-农商行
196 HEADERS_MAPPING.update( 238 HEADERS_MAPPING.update(
197 { 239 {
198 '交易发生额': BASE_HEADERS_MAPPING['金额'], 240 '交易发生额': AMOUNT_KEY,
199 } 241 }
200 ) 242 )
201 # 横版-特殊-中信银行-账户交易明细 243 # 横版-特殊-中信银行-账户交易明细
202 HEADERS_MAPPING.update( 244 HEADERS_MAPPING.update(
203 { 245 {
204 '对方银行': BASE_HEADERS_MAPPING['对方开户行'], 246 '交易摘要': SUMMARY_KEY,
205 '交易摘要': BASE_HEADERS_MAPPING['附言'],
206 } 247 }
207 ) 248 )
208 # 平安电子账单 249 # 平安电子账单
209 HEADERS_MAPPING.update( 250 HEADERS_MAPPING.update(
210 { 251 {
211 '借贷发生额(借:-贷:+)': BASE_HEADERS_MAPPING['金额'], 252 '借贷发生额(借:-贷:+)': AMOUNT_KEY,
212 } 253 }
213 ) 254 )
214 255
...@@ -218,7 +259,7 @@ HEADERS_MAPPING.update( ...@@ -218,7 +259,7 @@ HEADERS_MAPPING.update(
218 # 竖版-无表格-招商银行账户历史交易明细表 259 # 竖版-无表格-招商银行账户历史交易明细表
219 HEADERS_MAPPING.update( 260 HEADERS_MAPPING.update(
220 { 261 {
221 '联机余额': BASE_HEADERS_MAPPING['余额'], 262 '联机余额': OVER_KEY,
222 } 263 }
223 ) 264 )
224 # 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 265 # 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
...@@ -226,28 +267,23 @@ HEADERS_MAPPING.update( ...@@ -226,28 +267,23 @@ HEADERS_MAPPING.update(
226 # 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单 267 # 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单
227 HEADERS_MAPPING.update( 268 HEADERS_MAPPING.update(
228 { 269 {
229 '交易金额(元)': BASE_HEADERS_MAPPING['金额'], 270 '交易金额(元)': AMOUNT_KEY,
230 '交易金额(元)': BASE_HEADERS_MAPPING['金额'], 271 '交易金额(元)': AMOUNT_KEY,
231 '账户余额(元)': BASE_HEADERS_MAPPING['余额'], 272 '账户余额(元)': OVER_KEY,
232 '账户余额(元)': BASE_HEADERS_MAPPING['余额'], 273 '账户余额(元)': OVER_KEY,
233 '对手方户名': BASE_HEADERS_MAPPING['对方账户名'],
234 '对手方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
235 } 274 }
236 ) 275 )
237 # 横版-无表格-广发银行-账户交易历史 --> 已废弃 276 # 横版-无表格-广发银行-账户交易历史 --> 已废弃
238 # 竖版-无表格-广发银行-账户交易历史 --> 已废弃 277 # 竖版-无表格-广发银行-账户交易历史 --> 已废弃
239 HEADERS_MAPPING.update( 278 HEADERS_MAPPING.update(
240 { 279 {
241 '会计日期': BASE_HEADERS_MAPPING['记账日期'], 280 '会计日期': DATE_KEY,
242 '对手户名': BASE_HEADERS_MAPPING['对方账户名'],
243 '对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
244 } 281 }
245 ) 282 )
246 # 招行电子账单 TODO 有英文,需测试 283 # 招行电子账单 TODO 有英文,需测试
247 HEADERS_MAPPING.update( 284 HEADERS_MAPPING.update(
248 { 285 {
249 '对手信息': BASE_HEADERS_MAPPING['对方账户名'], 286 '摘要代码': SUMMARY_KEY,
250 '摘要代码': BASE_HEADERS_MAPPING['附言'],
251 } 287 }
252 ) 288 )
253 # 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号) 289 # 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
...@@ -255,46 +291,36 @@ HEADERS_MAPPING.update( ...@@ -255,46 +291,36 @@ HEADERS_MAPPING.update(
255 # 横版-无表格-民生银行 291 # 横版-无表格-民生银行
256 HEADERS_MAPPING.update( 292 HEADERS_MAPPING.update(
257 { 293 {
258 '摘要信息': BASE_HEADERS_MAPPING['附言'], 294 '摘要信息': SUMMARY_KEY,
259 '对方行名': BASE_HEADERS_MAPPING['对方开户行'],
260 } 295 }
261 ) 296 )
262 # 竖版-无表格-农业银行整数 297 # 竖版-无表格-农业银行整数
263 # 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单 298 # 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单
264 HEADERS_MAPPING.update( 299
265 {
266 '对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
267 }
268 )
269 # 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf 300 # 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf
270 # 竖版-无表格-农业银行-扩张.pdf 301 # 竖版-无表格-农业银行-扩张.pdf
271 # 竖版-无表格-农业银行-缩进.pdf 302 # 竖版-无表格-农业银行-缩进.pdf
272 HEADERS_MAPPING.update( 303 HEADERS_MAPPING.update(
273 { 304 {
274 '日期': BASE_HEADERS_MAPPING['记账日期'], 305 '日期': DATE_KEY,
275 '短摘要': BASE_HEADERS_MAPPING['附言'], 306 '短摘要': SUMMARY_KEY,
276 '本次余额': BASE_HEADERS_MAPPING['余额'], 307 '本次余额': OVER_KEY,
277 } 308 }
278 ) 309 )
279 # 竖版-无表格-农业银行-无标题(对手帐号) 310 # 竖版-无表格-农业银行-无标题(对手帐号)
280 HEADERS_MAPPING.update( 311 HEADERS_MAPPING.update(
281 { 312 {
282 '交易后余额': BASE_HEADERS_MAPPING['余额'], 313 '交易后余额': OVER_KEY,
283 '对手帐号': BASE_HEADERS_MAPPING['对方卡号/账号'],
284 } 314 }
285 ) 315 )
286 # 竖版-无表格-农商行(非常规) 316 # 竖版-无表格-农商行(非常规)
287 HEADERS_MAPPING.update( 317 HEADERS_MAPPING.update(
288 { 318 {
289 '交易说明': BASE_HEADERS_MAPPING['附言'], 319 '交易说明': SUMMARY_KEY,
290 } 320 }
291 ) 321 )
292 # 竖版-无表格-工商银行 抬头三行 活期历史明细清单 322 # 竖版-无表格-工商银行 抬头三行 活期历史明细清单
293 HEADERS_MAPPING.update( 323
294 {
295 '对方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
296 }
297 )
298 324
299 # -----------针式打印-全格线-------------------------------------------------------------------------------------------- 325 # -----------针式打印-全格线--------------------------------------------------------------------------------------------
300 # 竖版-表格-建设银行-中国建设银行活期账户交易明细 326 # 竖版-表格-建设银行-中国建设银行活期账户交易明细
...@@ -302,25 +328,19 @@ HEADERS_MAPPING.update( ...@@ -302,25 +328,19 @@ HEADERS_MAPPING.update(
302 # 竖版-表格-建设银行-对私活期账户明细- (1).pdf 328 # 竖版-表格-建设银行-对私活期账户明细- (1).pdf
303 HEADERS_MAPPING.update( 329 HEADERS_MAPPING.update(
304 { 330 {
305 '帐户余额': BASE_HEADERS_MAPPING['余额'], 331 '帐户余额': OVER_KEY,
306 '对方帐户名称': BASE_HEADERS_MAPPING['对方账户名'],
307 } 332 }
308 ) 333 )
309 # 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录 334 # 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录
310 HEADERS_MAPPING.update( 335 HEADERS_MAPPING.update(
311 { 336 {
312 '交易日期 记账日期': BASE_HEADERS_MAPPING['记账日期'], 337 '交易日期 记账日期': DATE_KEY,
313 } 338 }
314 ) 339 )
315 340
316 # ----------针式打印-部分格线------------------------------------------------------------------------------------------ 341 # ----------针式打印-部分格线------------------------------------------------------------------------------------------
317 # 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户) 342 # 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户)
318 # 竖版-特殊-邮储银行-账户交易明细(客户) 343 # 竖版-特殊-邮储银行-账户交易明细(客户)
319 HEADERS_MAPPING.update(
320 {
321 '对方账号/卡号/汇票号': BASE_HEADERS_MAPPING['对方卡号/账号'],
322 }
323 )
324 344
325 # -------------------------------------------------------------------------------------------------------------------- 345 # --------------------------------------------------------------------------------------------------------------------
326 346
...@@ -563,6 +583,49 @@ CLASSIFY_LIST = [ ...@@ -563,6 +583,49 @@ CLASSIFY_LIST = [
563 ('其他', OTHER_TUPLE), 583 ('其他', OTHER_TUPLE),
564 ] 584 ]
565 585
586 CLASSIFY_HEADER_LIST = [
587 OTHER_TUPLE,
588 OTHER_TUPLE,
589 OTHER_TUPLE,
590 OTHER_TUPLE,
591 ('记账日期', '记账时间', '币别', '金额', '余额', '交易名称', '渠道', '网点名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行'),
592 ('交易日期', '交易网点', '存入', '支出', '余额', '对方账号', '对方名称', '摘要', '渠道', '附言'),
593 ('序号', '日期', '摘要', '交易金额', '余额', '对方账号', '对方名称', '交易地点', '渠道', '附言'),
594 ('交易日期', '摘要', '交易金额', '余额', '交易渠道', '交易网点', '对方账号', '对方名称', '附言'),
595 ('交易日期', '业务摘要', '收/支', '发生额', '余额', '对方户名', '对方账号', '交易渠道'),
596 ('交易日期', '账号', '储种', '序号', '币种', '钞汇', '摘要', '地区', '收入/支出金额', '余额', '渠道'),
597 ('交易日期', '账号', '储种', '序号', '币种', '钞汇', '摘要', '地区', '收入/支出金额', '余额', '对方户名', '对方账号', '渠道'),
598 (None, '摘要', '交易日期', '交易金额', '账户余额', '商户/网点号及其名称', '对方账号与户名'),
599 ('交易单号', '交易时间', '交易类型', '收/支/其他', '交易方式', '金额(元)', '交易对方', '商户单号'),
600 ('流水号', '时间', '名称/备注', '收入', '支出', '账户余额', '资金渠道'),
601
602 ('交易日期', '记账日期', '交易地点', '交易类型', '借贷状态', '交易金额', '余额'),
603 ('交易日期', '交易类型', '交易金额(元)', '账户余额(元)', '操作柜员'),
604 ('交易日期', '交易类型', '交易币种', '交易金额(元)', '账户余额(元)', '对手方户名', '对手方账户', '收支类型'),
605 ('日期', '时间', '日志号', '短摘要', '交易金额', '本次余额', '交易网点', '渠道', '附言'),
606 ('交易日期', '摘要/附言', '交易金额', '对方账号和户名'),
607 ('记账日期', '货币', '交易金额', '联机余额', '冲补账', '交易摘要'),
608 ('记账日期', '货币', '交易金额', '联机余额', '交易摘要', '对手信息'),
609 ('凭证类型', '凭证号码', '交易时间', '摘要', '交易金额', '账户余额', '现转标志', '交易渠道', '交易机构', '对方户名', '对方行名'),
610
611 ('交易日期', '交易摘要', '收入金额', '支出金额', '账户余额', '对方户名', '对方账号', '对方银行', '交易流水号'),
612 ('交易日期', '摘要/附言', '交易金额', '余额', '交易地点/对方账号和户名'),
613 ('日期', '地点', '摘要', '存入', '支出', '余额', '对方账号', '对方户名'),
614 ('日期', '摘要', '交易金额', '余额', '地点', '交易对手账号', '对方户名'),
615 ('序号', '交易日期', '交易网点', '摘要', '借贷发生额(借:-贷:+)', '账户余额'),
616 ('序号', '摘要', '币别', '钞汇', '交易日期', '交易金额', '账户余额', '交易地点附言', '对方账号与户名'),
617 OTHER_TUPLE,
618 OTHER_TUPLE,
619 OTHER_TUPLE,
620 OTHER_TUPLE,
621 OTHER_TUPLE,
622 OTHER_TUPLE,
623 OTHER_TUPLE,
624 ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
625 ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
626 OTHER_TUPLE,
627 ]
628
566 # ----------license相关------------------------------------------------------------------------------------------------ 629 # ----------license相关------------------------------------------------------------------------------------------------
567 630
568 # "0":"AVT Invioce", 631 # "0":"AVT Invioce",
......
1 PAGE_DEFAULT = 1
2 PAGE_SIZE_DEFAULT = 10
3
4 FIXED_APPLICATION_ID_PREFIX = 'CH-S'
5
6 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT']
7 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
8
9 HIL_PREFIX = 'HIL'
10 AFC_PREFIX = 'AFC'
11 SPLIT_STR = '_'
12 BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
13 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
14
15 # -------EDMS相关---------------------------------------------------------------------------------------------------
16
17 SESSION_PREFIX = 'FHLSID'
18 CUSTOM_CLIENT = 'CustomClient'
19 FIXED_TOKEN = '00000000-0000-0000-0000-000000000000'
20 FIXED_FILE_SIZE = 0
21 DOWNLOAD_ACTION_TYPE = 'Downloaded'
22
23 DOC_SCHEMA_ID_FILL = {
24 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'),
25 'SETTLEMENT': (20, 'DFE-AutoFilingScript'),
26 'CONTRACT MANAGEMENT': (86, 'Schema-Based')
27 }
28 BUSINESS_TYPE_DICT = {
29 HIL_PREFIX: 'CO00002',
30 AFC_PREFIX: 'CO00001'
31 }
32 DOC_SCHEMA_TYPE = 'ElectronicRecord'
33 APPLICATION_ID_META_FIELD_id = 1
34 DEALER_CODE_META_FIELD_id = 13
35 BUSINESS_TYPE_META_FIELD_id = 93
36 DEALER_CODE = 'ocr_situ_group'
37
38 RETRY_TIMES = 3
39
40 # ---------银行流水模板相关--------------------------------------------------------------------------------------------
41
42 TRANS_MAP = {
43 'C': "0",
44 'c': "0",
45 '(': "0",
46 'o': "0",
47 'O': "0",
48 'D': "0",
49
50 '[': "1",
51 ']': "1",
52 'l': "1",
53 'L': "1",
54
55 'A': "4",
56
57 's': "5",
58 'S': "5",
59
60 'b': "6",
61
62 'g': "9",
63 'E': "9",
64
65 'B': "13",
66 }
67 TRANS = str.maketrans(TRANS_MAP)
68 ERROR_CHARS = {'.', '。', ':', ':', '•', '·', ',', ','}
69 SKIP_IMG_SHEET_NAME = '未处理图片'
70 SKIP_IMG_SHEET_HEADER = ('页码', '序号')
71
72 CARD_RATIO = 0.9
73 UNKNOWN_CARD = '未知卡号'
74 UNKNOWN_ROLE = '未知户名'
75 DATE_FORMAT = ['%Y年%m月%d日', '%Y/%m/%d', '%Y-%m-%d', '%Y%m%d']
76
77 PROOF_COL_TITLE = '核对结果'
78 PROOF_RES = ('对', '错')
79 META_SHEET_TITLE = '关键信息提取和展示'
80
81 FIXED_HEADERS = ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号',
82 '对方开户行', '核对结果', '借贷', '收入', '支出')
83 FIXED_COL_AMOUNT = len(FIXED_HEADERS)
84 BASE_HEADERS_MAPPING = {label: idx + 1 for idx, label in enumerate(FIXED_HEADERS)}
85 BORROW_HEADER_COL = BASE_HEADERS_MAPPING['借贷']
86 INCOME_HEADER_COL = BASE_HEADERS_MAPPING['收入']
87 OUTLAY_HEADER_COL = BASE_HEADERS_MAPPING['支出']
88 RESULT_HEADER_COL = BASE_HEADERS_MAPPING['核对结果']
89 BORROW_IDX = BORROW_HEADER_COL - 1
90 INCOME_IDX = INCOME_HEADER_COL - 1
91 OUTLAY_IDX = OUTLAY_HEADER_COL - 1
92 SUMMARY_IDX = FIXED_HEADERS.index('附言')
93 DATE_IDX = FIXED_HEADERS.index('记账日期')
94 AMOUNT_IDX = FIXED_HEADERS.index('金额')
95 OVER_IDX = FIXED_HEADERS.index('余额')
96 RESULT_IDX = FIXED_HEADERS.index('核对结果')
97 # '借贷': ('贷', '借'), # 竖版-无表格-广发银行
98 # '借贷状态': ('贷', '借'), # 竖版-特殊-交通银行
99 # '收/支': ('收入', '支出'), # 横版-表格-北京银行
100 BORROW_HEADERS_SET = {'借贷', '借贷状态', '收/支'}
101 BORROW_INCOME_SET = {'贷', '收入'}
102 BORROW_OUTLAY_SET = {'借', '支出'}
103 INCOME_HEADERS_SET = {'收入金额', '收入', '存入', '存入金额(贷)', '存入金额(贷)'}
104 OUTLAY_HEADERS_SET = {'支出金额', '支出', '支取金额(借)', '支取金额(借)'}
105
106 # ------------------普通打印-全格线--------------------------------------------------------------------------------------
107 HEADERS_MAPPING = {}
108 # 横版-表格-中国银行(不规则)
109 HEADERS_MAPPING.update(
110 {
111 '记账日期': BASE_HEADERS_MAPPING['记账日期'],
112 '记账时间': BASE_HEADERS_MAPPING['记账时间'],
113 '金额': BASE_HEADERS_MAPPING['金额'],
114 '余额': BASE_HEADERS_MAPPING['余额'],
115 '交易名称': BASE_HEADERS_MAPPING['交易名称'],
116 '附言': BASE_HEADERS_MAPPING['附言'],
117 '对方账户名': BASE_HEADERS_MAPPING['对方账户名'],
118 '对方卡号/账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
119 '对方开户行': BASE_HEADERS_MAPPING['对方开户行'],
120 }
121 )
122 # 横版-表格-农业银行-中国农业银行个人账户明细
123 HEADERS_MAPPING.update(
124 {
125 '交易日期': BASE_HEADERS_MAPPING['记账日期'],
126 # '存入': BASE_HEADERS_MAPPING['金额'],
127 '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
128 '对方名称': BASE_HEADERS_MAPPING['对方账户名'],
129 '摘要': BASE_HEADERS_MAPPING['附言'],
130 }
131 )
132 # 横版-表格-北京银行
133 HEADERS_MAPPING.update(
134 {
135 '业务摘要': BASE_HEADERS_MAPPING['附言'],
136 '发生额': BASE_HEADERS_MAPPING['金额'],
137 '对方户名': BASE_HEADERS_MAPPING['对方账户名'],
138 }
139 )
140 # 横版-表格-工商银行 借记卡账户历史明细清单
141 # 横版-表格-工商银行-机打验证码 借记卡账户历史明细清单
142 # 横版-表格-工商银行CH-B008802400
143 # 横版-表格-工商银行 工资明细清单
144 # 工商银行历史明细(申请单号:20042501303039397888)
145 HEADERS_MAPPING.update(
146 {
147 '收入/支出金额': BASE_HEADERS_MAPPING['金额'],
148 '工作日期': BASE_HEADERS_MAPPING['记账日期'],
149 }
150 )
151
152 # 横版-表格-建设银行-个人活期账户交易明细
153 # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604
154 # 竖版-表格-建设银行-工资账单CH-B008786812
155 # 竖版-表格-建设银行-个人活期账户交易明细 CH-B005832604 (2)
156 HEADERS_MAPPING.update(
157 {
158 '交易金额': BASE_HEADERS_MAPPING['金额'],
159 '账户余额': BASE_HEADERS_MAPPING['余额'],
160 '对方账号与户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
161 }
162 )
163 # 微信
164 HEADERS_MAPPING.update(
165 {
166 '交易时间': BASE_HEADERS_MAPPING['记账时间'],
167 '交易类型': BASE_HEADERS_MAPPING['附言'],
168 '金额(元)': BASE_HEADERS_MAPPING['金额'],
169 '金额(元)': BASE_HEADERS_MAPPING['金额'],
170 '交易对方': BASE_HEADERS_MAPPING['对方账户名'],
171 }
172 )
173 # 支付宝
174 HEADERS_MAPPING.update(
175 {
176 '时间': BASE_HEADERS_MAPPING['记账日期'],
177 '名称/备注': BASE_HEADERS_MAPPING['附言'],
178 }
179 )
180
181 # ------------普通打印-部分格线-------------------------------------------------------------------------------------------
182
183 # 竖版-无表格-农业银行
184 # 竖版-无表格-农业银行CH-B008805428
185 HEADERS_MAPPING.update(
186 {
187 '摘要/附言': BASE_HEADERS_MAPPING['附言'],
188 '交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
189 }
190 )
191 # 农业银行-窄页
192 HEADERS_MAPPING.update(
193 {
194 '交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
195 }
196 )
197 # 竖版-特殊-农商行
198 HEADERS_MAPPING.update(
199 {
200 '交易发生额': BASE_HEADERS_MAPPING['金额'],
201 }
202 )
203 # 横版-特殊-中信银行-账户交易明细
204 HEADERS_MAPPING.update(
205 {
206 '对方银行': BASE_HEADERS_MAPPING['对方开户行'],
207 '交易摘要': BASE_HEADERS_MAPPING['附言'],
208 }
209 )
210 # 平安电子账单
211 HEADERS_MAPPING.update(
212 {
213 '借贷发生额(借:-贷:+)': BASE_HEADERS_MAPPING['金额'],
214 }
215 )
216
217 # ------------普通打印-无格线--------------------------------------------------------------------------------------------
218
219 # 竖版-无表格-招商银行(略歪)
220 # 竖版-无表格-招商银行账户历史交易明细表
221 HEADERS_MAPPING.update(
222 {
223 '联机余额': BASE_HEADERS_MAPPING['余额'],
224 }
225 )
226 # 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
227 # 竖版-无表格-邮储银行 账户对账单
228 # 竖版-无表格-邮储银行-电子章 邮储银行 账户对账单
229 HEADERS_MAPPING.update(
230 {
231 '交易金额(元)': BASE_HEADERS_MAPPING['金额'],
232 '交易金额(元)': BASE_HEADERS_MAPPING['金额'],
233 '账户余额(元)': BASE_HEADERS_MAPPING['余额'],
234 '账户余额(元)': BASE_HEADERS_MAPPING['余额'],
235 '对手方户名': BASE_HEADERS_MAPPING['对方账户名'],
236 '对手方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
237 }
238 )
239 # 横版-无表格-广发银行-账户交易历史 --> 已废弃
240 # 竖版-无表格-广发银行-账户交易历史 --> 已废弃
241 HEADERS_MAPPING.update(
242 {
243 '会计日期': BASE_HEADERS_MAPPING['记账日期'],
244 '对手户名': BASE_HEADERS_MAPPING['对方账户名'],
245 '对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'],
246 }
247 )
248 # 招行电子账单 TODO 有英文,需测试
249 HEADERS_MAPPING.update(
250 {
251 '对手信息': BASE_HEADERS_MAPPING['对方账户名'],
252 '摘要代码': BASE_HEADERS_MAPPING['附言'],
253 }
254 )
255 # 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
256 # 横版-无表格-民生银行-无标题(客户账户)
257 # 横版-无表格-民生银行
258 HEADERS_MAPPING.update(
259 {
260 '摘要信息': BASE_HEADERS_MAPPING['附言'],
261 '对方行名': BASE_HEADERS_MAPPING['对方开户行'],
262 }
263 )
264 # 竖版-无表格-农业银行整数
265 # 竖版-无表格-农业银行-中国农业银行银行卡交易明细清单
266 HEADERS_MAPPING.update(
267 {
268 '对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'],
269 }
270 )
271 # 竖版-无表格-农业银行-中国农业银行银行卡活期存折交易明细清单.pdf
272 # 竖版-无表格-农业银行-扩张.pdf
273 # 竖版-无表格-农业银行-缩进.pdf
274 HEADERS_MAPPING.update(
275 {
276 '日期': BASE_HEADERS_MAPPING['记账日期'],
277 '短摘要': BASE_HEADERS_MAPPING['附言'],
278 '本次余额': BASE_HEADERS_MAPPING['余额'],
279 }
280 )
281 # 竖版-无表格-农业银行-无标题(对手帐号)
282 HEADERS_MAPPING.update(
283 {
284 '交易后余额': BASE_HEADERS_MAPPING['余额'],
285 '对手帐号': BASE_HEADERS_MAPPING['对方卡号/账号'],
286 }
287 )
288 # 竖版-无表格-农商行(非常规)
289 HEADERS_MAPPING.update(
290 {
291 '交易说明': BASE_HEADERS_MAPPING['附言'],
292 }
293 )
294 # 竖版-无表格-工商银行 抬头三行 活期历史明细清单
295 HEADERS_MAPPING.update(
296 {
297 '对方账户': BASE_HEADERS_MAPPING['对方卡号/账号'],
298 }
299 )
300
301 # -----------针式打印-全格线--------------------------------------------------------------------------------------------
302 # 竖版-表格-建设银行-中国建设银行活期账户交易明细
303 # 竖版-表格-建设银行-中国建设银行活期账户明细清单
304 # 竖版-表格-建设银行-对私活期账户明细- (1).pdf
305 HEADERS_MAPPING.update(
306 {
307 '帐户余额': BASE_HEADERS_MAPPING['余额'],
308 '对方帐户名称': BASE_HEADERS_MAPPING['对方账户名'],
309 }
310 )
311 # 竖版-特殊-交通银行 零售客户交易清单 5000以上交易记录
312 HEADERS_MAPPING.update(
313 {
314 '交易日期 记账日期': BASE_HEADERS_MAPPING['记账日期'],
315 }
316 )
317
318 # ----------针式打印-部分格线------------------------------------------------------------------------------------------
319 # 竖版-特殊-邮储银行-一本通绿卡通交易明细(客户)
320 # 竖版-特殊-邮储银行-账户交易明细(客户)
321 HEADERS_MAPPING.update(
322 {
323 '对方账号/卡号/汇票号': BASE_HEADERS_MAPPING['对方卡号/账号'],
324 }
325 )
326
327 # --------------------------------------------------------------------------------------------------------------------
328
329 # ('记账日期', '记账时间', '金额', '余额', '交易名称', '附言', '对方账户名', '对方卡号/账号', '对方开户行', '核对结果', '借贷', '收入', '支出')
330 # CLASSIFY_LIST = [
331 # # --------------普通打印:全格线---------------------------------
332 # # 中国银行:记账日期 记账时间 币别 金额 余额 交易名称 渠道 网点名称 附言 对方账户名 对方卡号/账号 对方开户行
333 # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则)
334 #
335 # # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言
336 # ('农业银行-10', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), # 横版-表格-农业银行-中国农业银行个人账户明细
337 #
338 # # 农业银行:序号 日期 摘要 交易金额 余额 对方账号 对方名称 交易地点 渠道 附言
339 # ('农业银行-10-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
340 #
341 # # 农业银行:交易日期 摘要 交易金额 余额 交易渠道 交易网点 对方账号 对方名称 附言
342 # ('农业银行-9', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
343 #
344 # # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道
345 # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行
346 #
347 # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 渠道
348 # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
349 #
350 # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 对方户名 对方账号 渠道
351 # ('工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
352 #
353 # # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行
354 # # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行
355 # ('建设银行-竖版', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
356 # ('建设银行-横版', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
357 #
358 # # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号
359 # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
360 #
361 # # 支付宝:流水号 时间 名称/备注 收入 支出 账户余额 资金渠道
362 # ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
363 #
364 # # -----------------普通打印:部分格线--------------------------------
365 #
366 # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
367 # ('农业银行-5', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
368 #
369 # # 农业银行:日期 地点 摘要 存入 支出 余额 对方账号 对方户名
370 # ('农业银行-8', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
371
372 # # 农业银行:日期 摘要 交易金额 余额 地点 交易对手账号 对方户名
373 # ('农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
374 #
375 # # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注
376 # ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)),
377 #
378 # # 中信银行:交易日期 交易摘要 收入金额 支出金额 账户余额 对方户名 对方账号 对方银行 交易流水号
379 # ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
380 #
381 # # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额
382 # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
383
384 # # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名
385 # ('建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
386 #
387 # # -----------------普通打印:无格线-------------------------------------
388 #
389 # # 招商银行:记账日期 货币 交易金额 联机余额 冲补账 交易摘要
390 # ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
391 #
392 # # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户
393 # # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单
394 # ('邮储银行-8', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
395 # ('邮储银行-5', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
396 #
397 # # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道
398 # ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
399 #
400 # # 招商银行电子版:记账日期 货币 交易金额 联机余额 交易摘要 对手信息
401 # ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
402 #
403 # # 民生银行:凭证类型 凭证号码 摘要信息 交易时间 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行-中国民生银行个人账户对账单(客户卡号)
404 # # 凭证类型 凭证号码 交易时间 摘要 交易金额 账户余额 现转标志 交易渠道 交易机构 对方户名 对方行名 --> 横版-无表格-民生银行
405 # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
406 #
407 # # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名
408 # ('农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
409 #
410 # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名
411 # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
412 #
413 # # 农业银行:日期、时间、短摘要、交易金额、本次余额、交易网点、渠道、附言
414 # # 农业银行:日期、时间、日志号、短摘要、交易金额、本次余额、交易网点、渠道、附言
415 # ('农业银行', (1, 2, 4, 5, None, 3, None, None, None, None, None, None, None)),
416 # ('农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
417 #
418 # # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额
419 # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
420 #
421 #
422 # # ================针式打印:有格线===================
423 #
424 # # 建设银行: 摘要、交易日期、交易金额、账户余额、商户/网点号及其名称、对方账号、对方户名 --> 竖版-表格-建设银行-中国建设银行活期账户明细清单
425 # # 交易日期、摘要、 币种、 钞汇、 交易金额、 帐户余额、对方账号、 对方帐户名称 --> 竖版-表格-建设银行-对私活期账户明细- (1)
426 # ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
427 #
428 #
429 # # ================针式打印:无格线===================
430 #
431 # # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称
432 # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
433 # ]
434
435 OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None)
436
437 # {
438 # "0":"其他",
439 # "1":"普通打印-全表格-中国农业银行",
440 # "2":"普通打印-全表格-中国银行",
441 # "3":"普通打印-全表格-北京银行",
442 # "4":"普通打印-全表格-工商银行",
443 # "5":"普通打印-全表格-建设银行",
444 # "6":"普通打印-全表格-微信账单",
445 # "7":"普通打印-全表格-支付宝账单",
446 # "8":"普通打印-无格线-中国邮政储蓄银行",
447
448 # "9":"普通打印-无格线-交通银行",
449 # "10":"普通打印-无格线-农业银行整数",
450 # "11":"普通打印-无格线-农业银行银行活期扩张缩进",
451 # "12":"普通打印-无格线-招商银行",
452 # "13":"普通打印-无格线-招行电子账单",
453 # "14":"普通打印-无格线-民生银行",
454
455 # "15":"普通打印-部分格线-横版-中信银行",
456 # "16":"普通打印-部分格线-竖版-中国农业银行分账户窄页",
457 # "17":"普通打印-部分格线-竖版-农业银行",
458 # "18":"普通打印-部分格线-竖版-农业银行银行卡交易明细",
459 # "19":"普通打印-部分格线-竖版-平安电子账单",
460
461 # "20":"针式打印-全格线-建设银行",
462 # "21":"针式打印-部分格线-竖版-邮储银行账户交易",
463 # "22":"针式打印-部分格线-邮储银行一本通绿卡"
464 # }
465
466 # CLASSIFY_LIST = [
467 # ('其他', OTHER_TUPLE),
468 # ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
469 # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
470 # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
471 # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
472 # ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
473 # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
474 # ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
475 #
476 # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
477 # ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
478 # ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)),
479 # ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
480 # ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
481 # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
482 #
483 # ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
484 # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
485 # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
486 # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
487 # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
488 #
489 # ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)),
490 # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
491 # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
492 # ]
493
494 # "4":"普通打印-全表格-中国银行",
495 # "5":"普通打印-全表格-农业银行-10列",
496 # "6":"普通打印-全表格-农业银行-10列-1",
497 # "7":"普通打印-全表格-农业银行-9列",
498 # "8":"普通打印-全表格-北京银行",
499 # "9":"普通打印-全表格-工商银行",
500 # "10":"普通打印-全表格-工商银行-电子账单",
501 # "11":"普通打印-全表格-建设银行",
502 # "12":"普通打印-全表格-微信账单",
503 # "13":"普通打印-全表格-支付宝账单",
504
505 # "14":"普通打印-无格线-交通银行",
506 # "15":"普通打印-无格线-储蓄银行-5列",
507 # "16":"普通打印-无格线-储蓄银行-8列",
508 # "17":"普通打印-无格线-农业银行-扩张缩进",
509 # "18":"普通打印-无格线-农业银行-整数",
510 # "19":"普通打印-无格线-招商银行",
511 # "20":"普通打印-无格线-招商银行-电子账单",
512 # "21":"普通打印-无格线-民生银行",
513
514 # "22":"普通打印-部分格线-横版-中信银行",
515 # "23":"普通打印-部分格线-竖版-农业银行-5列",
516 # "24":"普通打印-部分格线-竖版-农业银行-8列",
517 # "25":"普通打印-部分格线-竖版-农业银行-窄页",
518 # "26":"普通打印-部分格线-竖版-平安电子账单",
519 # "27":"普通打印-部分格线-竖版-建设银行-电子账单",
520
521 # "34":"针式打印-全格线-建设银行",
522 # "35":"针式打印-部分格线-竖版-邮储银行",
523 # "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
524
525 CLASSIFY_LIST = [
526 ('其他', OTHER_TUPLE),
527 ('其他', OTHER_TUPLE),
528 ('其他', OTHER_TUPLE),
529 ('其他', OTHER_TUPLE),
530 ('普通打印-全表格-中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
531 ('普通打印-全表格-农业银行-10列', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)),
532 ('普通打印-全表格-农业银行-10列-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)),
533 ('普通打印-全表格-农业银行-9列', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)),
534 ('普通打印-全表格-北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
535 ('普通打印-全表格-工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
536 ('普通打印-全表格-工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)),
537 ('普通打印-全表格-建设银行', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)),
538 ('普通打印-全表格-微信账单', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)),
539 ('普通打印-全表格-支付宝账单', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)),
540
541 ('普通打印-无格线-交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)),
542 ('普通打印-无格线-储蓄银行-5列', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)),
543 ('普通打印-无格线-储蓄银行-8列', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)),
544 ('普通打印-无格线-农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)),
545 ('普通打印-无格线-农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)),
546 ('普通打印-无格线-招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)),
547 ('普通打印-无格线-招商银行-电子账单', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)),
548 ('普通打印-无格线-民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)),
549
550 ('普通打印-部分格线-横版-中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
551 ('普通打印-部分格线-竖版-农业银行-5列', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
552 ('普通打印-部分格线-竖版-农业银行-8列', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)),
553 ('普通打印-部分格线-竖版-农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)),
554 ('普通打印-部分格线-竖版-平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
555 ('普通打印-部分格线-竖版-建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)),
556 ('其他', OTHER_TUPLE),
557 ('其他', OTHER_TUPLE),
558 ('其他', OTHER_TUPLE),
559 ('其他', OTHER_TUPLE),
560 ('其他', OTHER_TUPLE),
561 ('其他', OTHER_TUPLE),
562 ('针式打印-全格线-建设银行', OTHER_TUPLE),
563 ('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
564 ('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
565 ('其他', OTHER_TUPLE),
566 ]
567
568 # ----------license相关------------------------------------------------------------------------------------------------
569
570 # "0":"AVT Invioce",
571 # "1":"二手车发票",
572 # "2":"其他",
573 # "3":"护照",
574 # "28":"机动车登记证",
575 # "29":"机动车销售统一发票",
576 # "30":"港澳通行证",
577 # "31":"营业执照",
578 # "32":"行驶证",
579 # "33":"身份证",
580 # "37":"银行卡"
581
582 # 其他
583 OTHER_CLASSIFY = 2
584
585 # 身份证
586 IC_CN_NAME = '身份证'
587 IC_CLASSIFY = 33
588 IC_FIELD_ORDER_0 = (('姓名', '姓名'),
589 ('公民身份号码', '公民身份号码'),
590 ('出生年月', '出生年月'),
591 ('住址', '住址'),
592 ('性别', '性别'),
593 ('民族', '民族'),)
594 IC_FIELD_ORDER_1 = (('有效期限', '有效期限'), ('签发机关', '签发机关'),)
595 # 居住证
596 RP_CN_NAME = '居住证'
597 RP_CLASSIFY = 10087
598 RP_FIELD_ORDER_0 = (('姓名', '姓名'),
599 ('公民身份号码', '公民身份号码'),
600 ('出生年月', '出生年月'),
601 ('住址', '住址'),
602 ('性别', '性别'),)
603 RP_FIELD_ORDER_1 = IC_FIELD_ORDER_1
604 # 增值税发票
605 VAT_CN_NAME = 'VAT普票'
606 VAT_CLASSIFY = 0
607 VAT_FIELD_ORDER = (('发票代码', '发票代码'),
608 ('发票代码(开具)', '发票代码(开具)'),
609 ('发票号码', '发票号码'),
610 ('发票号码(开具)', '发票号码(开具)'),
611 ('开票日期', '开票日期'),
612 ('校验码', '校验码'),
613 ('货物或应税劳务、服务名称', '货物或应税劳务、服务名称'),
614 ('金额合计', '开具金额合计(不含税)'),
615 ('税率', '税率'),
616 ('税额合计', '税额合计'),
617 ('价税合计小写', '价税合计(小写)'),
618 ('价税合计大写', '价税合计(大写)'),
619 ('购方名称', '购买方名称'),
620 ('购方纳税人识别号', '购买方纳税人识别号'),
621 ('购方地址、电话', '购买方地址、电话'),
622 ('购方开户行及账号', '购买方开户行及账号'),
623 ('销方名称', '销售方名称'),
624 ('销方纳税人识别号', '销售方纳税人识别号'),
625 ('销方地址、电话', '销售方地址、电话'),
626 ('销方开户行及账号', '销售方开户行及账号'),
627 ('销售方:(章)', '销售方:(章)'),
628 ('备注', '备注'),)
629 # 机动车登记证书
630 MVC_CN_NAME = '机动车登记证书'
631 MVC_CLASSIFY = 28
632 MVC_CLASSIFY_SE = 10086
633 MVC_FIELD_ORDER_1_2 = (('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'),
634 ('3.登记日期', '登记日期'),
635 ('9.车辆识别代号/车架号', '车辆识别代号/车架号'),
636 ('32.车辆出厂日期', '车辆出厂日期'),
637 ('34.发证日期', '发证日期'),
638 ('30.使用性质', '使用性质'),
639 ('31.车辆获得方式', '车辆获得方式'),
640 ('4.机动车登记编号', '机动车登记编号'),
641 ('空行占位', None),
642 ('5.车辆类型', '车辆类型'),
643 ('6.车辆品牌', '车辆品牌'),
644 ('7.车辆型号', '车辆型号'),
645 ('8.车身颜色', '车身颜色'),
646 ('10.国产/进口', '国产/进口'),
647 ('11.发动机号', '发动机号'),
648 ('12.发动机型号', '发动机型号'),
649 ('15.制造厂名称', '制造厂名称'),
650 ('2.登记机关', '登记机关'),
651 ('编号', '机动车登记证书编号'),)
652 MVC_FIELD_ORDER_3_4 = (
653 ('姓名/名称', '姓名/名称'),
654 ('身份证明名称/号码', '身份证明名称/号码'),
655 ('转移登记日期', '转移登记日期'),
656 )
657 MVC_SE_FIELD_ORDER_1_2 = (('9.车辆识别代号/车架号', '车辆识别代号/车架号'),
658 ('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'),
659 ('空行占位', None),
660 ('3.登记日期', '登记日期'),
661 ('32.车辆出厂日期', '车辆出厂日期'),
662 ('34.发证日期', '发证日期'),
663 ('30.使用性质', '使用性质'),
664 ('31.车辆获得方式', '车辆获得方式'),
665 ('5.车辆类型', '车辆类型'),
666 ('6.车辆品牌', '车辆品牌'),
667 ('7.车辆型号', '车辆型号'),
668 ('8.车身颜色', '车身颜色'),
669 ('10.国产/进口', '国产/进口'),
670 ('11.发动机号', '发动机号'),
671 ('12.发动机型号', '发动机型号'),
672 ('13.燃料种类', '燃料种类'),
673 ('14.排量/功率', '排量/功率'),
674 ('15.制造厂名称', '制造厂名称'),
675 ('16.转向形式', '转向形式'),
676 ('17.轮距', '轮距'),
677 ('18.轮胎数', '轮胎数'),
678 ('19.轮胎规格', '轮胎规格'),
679 ('20.钢板弹簧片数', '钢板弹簧片数'),
680 ('21.轴距', '轴距'),
681 ('22.轴数', '轴数'),
682 ('23.外廓尺寸', '外廓尺寸'),
683 ('24.货厢内部尺寸', '货厢内部尺寸'),
684 ('25.总质量', '总质量'),
685 ('26.核定载质量', '核定载质量'),
686 ('27.核定载客', '核定载客'),
687 ('28.准牵引总质量', '准牵引总质量'),
688 ('29.驾驶室载客', '驾驶室载客'),
689 ('2.登记机关', '登记机关'),
690 ('4.机动车登记编号', '机动车登记编号'),
691 ('编号', '机动车登记证书编号'),)
692 MVC_SE_FIELD_ORDER_3_4 = (
693 ('姓名/名称', '姓名/名称'),
694 ('身份证明名称/号码', '身份证明名称/号码'),
695 ('转移登记日期', '转移登记日期'),
696 )
697 # 机动车销售统一发票
698 MVI_CN_NAME = '机动车销售统一发票'
699 MVI_CLASSIFY = 29
700 MVI_FIELD_ORDER = (('发票代码', '发票代码'),
701 ('发票号码', '发票号码'),
702 ('开票日期', '开票日期'),
703 ('不含税价', '不含税价'),
704 ('发票类型', '发票联'),
705 ('购方名称', '购买方名称'),
706 ('购买方身份证号或组织机构代码', '购买方证件号码'),
707 ('纳税人识别号', '纳税人识别号'), # nodo
708 ('车辆识别代码', '车架号'),
709 ('价税合计小写', '价税合计小写'),
710 ('销方名称', '销货单位名称'),
711 ('增值税税额', '增值税税额'),
712 ('增值税税率', '增值税税率'), # nodo
713 ('发票章有无', '发票章有无'), # nodo 全国统一发票监制章 销售单位章
714 ('价税合计大写', '价税合计大写'), # nodo
715 ('', None),
716 ('发动机号码', '发动机号'),
717 ('车辆类型', '车辆类型'), # nodo
718 ('厂牌型号', '厂牌型号'), # nodo
719 ('产地', '产地'), # nodo
720 ('合格证号', '合格证号'), # nodo
721 ('进口证明书号', '进口证明书号'), # nodo
722 ('商检单号', '商检单号'), # nodo
723 ('电话', '电话'), # nodo
724 ('销方纳税人识别号', '销货方纳税人识别号'),
725 ('账号', '账号'), # nodo
726 ('地址', '地址'), # nodo
727 ('开户银行', '开户银行'), # nodo
728 ('主管税务机关及代码', '主管税务机关及代码'), # nodo
729 ('吨位', '吨位'), # nodo
730 ('限乘人数', '限乘人数'),) # nodo
731 IC_PID = VAT_PID = MVC_PID = MVI_PID = None
732
733 # 营业执照
734 BL_CN_NAME = '营业执照'
735 BL_CLASSIFY = 31
736 BL_PID = 41
737 BL_FIELD_ORDER = (('注册号', '统一社会信用代码'),
738 ('企业名称', '名称'),
739 ('企业类型', '类型'),
740 ('经营者姓名', '法定代表人'),
741 ('成立日期', '成立日期'),
742 ('营业期限', '营业期限'),
743 ('注册资本', '注册资本'),
744 ('地址', '住所'),
745 ('经营范围', '经营范围'),)
746 # 二手车发票
747 UCI_CN_NAME = '二手车发票'
748 UCI_CLASSIFY = 1
749 UCI_PID = 60
750 UCI_FIELD_ORDER = (('发票代码', '发票代码'),
751 ('发票号码', '发票号码'),
752 ('开票日期', '开票日期'),
753 ('车价合计', '车价合计小写'),
754 ('发票联', '发票联'),
755 ('购方单位', '买方单位/个人'),
756 ('购方号码', '买方单位代码/身份证号码'),
757 ('车架号码', '车架号'),
758 ('车价合计大写', '车价合计大写'),
759 ('二手车市场', '二手车市场'),
760 ('发票章有无', '发票章有无'),
761 ('空行占位', None),
762 ('车牌照号', '车牌照号'),
763 ('登记证号', '登记证号'),
764 ('购方地址', '买方单位/住址'),
765 ('车辆类型', '车辆类型'),
766 ('厂牌型号', '厂牌型号'),
767 ('车管所名称', '转入地车辆管理所名称'),
768 ('销方名称', '卖方单位/个人'),
769 ('销方号码', '卖方单位代码/身份证号码'),
770 ('销方地址', '卖方单位/个人住址'),)
771 # 港澳台通行证
772 EEP_CN_NAME = '港澳台通行证'
773 EEP_CLASSIFY = 30
774 EEP_PID = 1018
775 EEP_FIELD_ORDER = (('中文名', '姓名'), # 英文名
776 ('证件号码', '证件号码'),
777 ('签发次数', '换证次数(签发次数)'),
778 ('有效期限', '有效期限'),
779 ('出生日期', '出生日期'),
780 ('性别', '性别'),
781 ('签发机关', '签发机关'),
782 ('签发地点', '签发地点'),)
783 # 行驶证
784 DL_CN_NAME = '行驶证'
785 DL_CLASSIFY = 32
786 DL_PID = 5
787 DL_FIELD_ORDER_0 = (('号牌号码', '1 号牌号码'),
788 ('所有人', '3 所有人'),
789 ('使用性质', '5 使用性质'),
790 ('车辆识别代码', '7 车辆识别代号'),
791 ('注册日期', '9 注册日期'),
792 ('发证日期', '10 发证日期'),
793 ('车辆类型', '2 车辆类型'),
794 ('地址', '4 住址'),
795 ('品牌型号', '6 品牌型号'),
796 ('发动机号', '8 发动机号码'),)
797 DL_FIELD_ORDER_1 = (('号牌号码', '1 号牌号码'),
798 ('档案编号', '11 档案编号'),
799 ('核定载人数', '12 核定载人数'),
800 ('总质量', '13 总质量'),
801 ('整备质量', '14 整备质量'),
802 ('核定载质量', '15 核对载质量'),
803 ('外廓尺寸', '16 外廓尺寸'),
804 ('准牵引总质量', '17 准牵引总质量'),)
805 # 护照
806 PP_CN_NAME = '护照'
807 PP_CLASSIFY = 3
808 PP_PID = 8
809 PP_FIELD_ORDER = (('类型', '类型/Type'),
810 ('英文姓名', '姓名/Name'),
811 ('护照号码', '护照号码/Passport No'),
812 ('有效期至', '有效期至/Date of expiry'),
813 ('签发日期', '签发日期/Date of issue'),
814 ('国家码', '国家码/Country Code'),
815 ('性别', '性别/Sex'),
816 ('国籍', '国籍/Nationality'),
817 ('出生日期', '出生日期/Date of birth'),
818 ('出生地点', '出生地点/Place of birth'),
819 ('签发地点', '签发地点/Place of issue'),)
820 # 银行卡
821 BC_CN_NAME = '银行卡'
822 BC_CLASSIFY = 37
823 BC_PID = 4
824 # BC_FIELD = (('CardNum', '银行卡号'),
825 # ('BankName', '发卡行名称'),
826 # ('CardName', '银行卡名称'),
827 # ('BankCode', '发卡行代号'),
828 # ('CardType', '银行卡类型'),
829 # ('Date', '日期'))
830 BC_FIELD_ORDER = (('BankName', '发卡行名称'),
831 ('CardNum', '银行卡号'),
832 ('CardType', '银行卡类型'),)
833
834 SUCCESS_CODE_SET = {'0', 0}
835
836 FIELD_ORDER_MAP = {
837 IC_CLASSIFY: ('有效期限', IC_FIELD_ORDER_1, IC_FIELD_ORDER_0),
838 RP_CLASSIFY: ('有效期限', RP_FIELD_ORDER_1, RP_FIELD_ORDER_0),
839 DL_CLASSIFY: ('档案编号', DL_FIELD_ORDER_1, DL_FIELD_ORDER_0),
840 MVC_CLASSIFY: ('转移登记日期', MVC_FIELD_ORDER_3_4, MVC_FIELD_ORDER_1_2),
841 MVC_CLASSIFY_SE: ('转移登记日期', MVC_SE_FIELD_ORDER_3_4, MVC_SE_FIELD_ORDER_1_2)
842 }
843
844 LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False)),
845 (IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False)),
846 (RP_CLASSIFY, (None, RP_CN_NAME, None, True, False)),
847 (BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False)),
848 (BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False)),
849 (UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False)),
850 (EEP_CLASSIFY, (EEP_PID, EEP_CN_NAME, EEP_FIELD_ORDER, False, False)),
851 (DL_CLASSIFY, (DL_PID, DL_CN_NAME, None, True, False)),
852 (PP_CLASSIFY, (PP_PID, PP_CN_NAME, PP_FIELD_ORDER, False, False)),
853 (MVC_CLASSIFY, (MVC_PID, MVC_CN_NAME, None, True, True)),
854 (VAT_CLASSIFY, (VAT_PID, VAT_CN_NAME, VAT_FIELD_ORDER, False, False)))
855
856 LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
857
858 OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
859 LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY}
860 LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}
...@@ -30,51 +30,114 @@ class BSWorkbook(Workbook): ...@@ -30,51 +30,114 @@ class BSWorkbook(Workbook):
30 self.MAX_MEAN = 31 30 self.MAX_MEAN = 31
31 31
32 @staticmethod 32 @staticmethod
33 def sheet_prune(ws, classify): 33 def header_collect(ws, sheet_header_info, header_info):
34 ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT) 34 # sheet_header_info = {
35 moved_col_set = set() 35 # 'sheet_name': {
36 header_col_set = set() 36 # 'summary_col': 1,
37 # 根据第一行关键词排列 37 # 'date_col': 1,
38 for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1): 38 # 'amount_col': 1,
39 header_value = ws.cell(1, col).value 39 # 'over_col': 1,
40 # 'income_col': 1,
41 # 'outlay_col': 1,
42 # 'borrow_col': 1,
43 # 'min_row': 2,
44 # 'find_count': 3,
45 # 'find_col': {1},
46 # 'header': ('日期', '金额')
47 # }
48 # }
49
50 # header_info = {
51 # 'summary_col': {
52 # 5: 2,
53 # 3: 1,
54 # },
55 # 'date_col': {},
56 # 'amount_col': {},
57 # 'over_col': {},
58 # 'income_col': {},
59 # 'outlay_col': {},
60 # 'borrow_col': {},
61 # }
62
63 # 第一行关键词
64 find_count = 0
65 for first_row in ws.iter_rows(max_row=1, min_row=1, values_only=True):
66 sheet_header_info.setdefault(ws.title, {}).setdefault(consts.HEADER_KEY, first_row)
67 for idx, header_value in enumerate(first_row):
40 header_col = consts.HEADERS_MAPPING.get(header_value) 68 header_col = consts.HEADERS_MAPPING.get(header_value)
41 if header_col is not None and header_col not in header_col_set: 69 if header_col is not None:
42 letter = get_column_letter(col) 70 find_count += 1
43 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - col) 71 sheet_header_info.setdefault(ws.title, {}).setdefault(header_col, idx)
44 moved_col_set.add(col) 72 find_col_set = sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COL_KEY, set())
45 header_col_set.add(header_col) 73 find_col_set.add(idx)
46 elif header_value in consts.BORROW_HEADERS_SET: 74 col_count = header_info.setdefault(header_col, {}).get(idx)
47 letter = get_column_letter(col) 75 header_info.setdefault(header_col, {})[idx] = 1 if col_count is None else col_count+1
48 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.BORROW_HEADER_COL - col) 76
49 moved_col_set.add(col) 77 sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COUNT_KEY, find_count)
50 header_col_set.add(consts.BORROW_HEADER_COL) 78 min_row = 1 if find_count == 0 else 2
51 elif header_value in consts.INCOME_HEADERS_SET: 79 sheet_header_info.setdefault(ws.title, {}).setdefault(consts.MIN_ROW_KEY, min_row)
52 letter = get_column_letter(col) 80
53 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.INCOME_HEADER_COL - col) 81 @staticmethod
54 moved_col_set.add(col) 82 def header_statistics(sheet_header_info, header_info, classify):
55 header_col_set.add(consts.INCOME_HEADER_COL) 83 # statistics_header_info = {
56 elif header_value in consts.OUTLAY_HEADERS_SET: 84 # SUMMARY_KEY: 2,
57 letter = get_column_letter(col) 85 # DATE_KEY: 3,
58 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.OUTLAY_HEADER_COL - col) 86 # AMOUNT_KEY: 4,
59 moved_col_set.add(col) 87 # OVER_KEY: 5,
60 header_col_set.add(consts.OUTLAY_HEADER_COL) 88 # IMCOME_KEY: 6,
61 89 # OUTLAY_KEY: 7,
62 # 缺失表头再次查找 90 # BORROW_KEY: 8,
63 for header_col in range(1, consts.FIXED_COL_AMOUNT + 1): 91 # 'header': ('日期', '金额')
64 if header_col in header_col_set or header_col == consts.RESULT_HEADER_COL: 92 # }
65 continue 93 statistics_header_info = {}
66 fix_col = consts.CLASSIFY_LIST[classify][1][header_col - 1] 94 sheet_order_list = sorted(sheet_header_info, reverse=True,
67 if fix_col is None: 95 key=lambda x: sheet_header_info[x][consts.FIND_COUNT_KEY])
96 best_sheet_info = sheet_header_info.get(sheet_order_list[0])
97 if best_sheet_info.get(consts.FIND_COUNT_KEY, 0) == 0:
98 for key, value in consts.CLASSIFY_MAP.items():
99 col = consts.CLASSIFY_LIST[classify][1][value] - 1
100 statistics_header_info[key] = col
101 statistics_header_info[consts.HEADER_KEY] = consts.CLASSIFY_HEADER_LIST[classify]
102 else:
103 find_col_set = best_sheet_info.get(consts.FIND_COL_KEY, set())
104 # SUMMARY_KEY DATE_KEY OVER_KEY BORROW_KEY
105 for key in consts.KEY_LIST:
106 col = best_sheet_info.get(key)
107 if col is None:
108 col_dict = header_info.get(key, {})
109 for idx in sorted(col_dict, key=lambda x: col_dict[x], reverse=True):
110 if idx in find_col_set:
68 continue 111 continue
69 fix_col = fix_col + consts.FIXED_COL_AMOUNT 112 col = idx
70 if fix_col in moved_col_set: 113 find_col_set.add(col)
71 break 114 break
72 letter = get_column_letter(fix_col) 115 else:
73 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - fix_col) 116 fixed_col = consts.CLASSIFY_LIST[classify][1][consts.CLASSIFY_MAP[key]] - 1
117 if fixed_col not in find_col_set:
118 col = fixed_col
119 find_col_set.add(col)
120 statistics_header_info[key] = col
121 statistics_header_info[consts.HEADER_KEY] = best_sheet_info.get(consts.HEADER_KEY)
122 return statistics_header_info
74 123
75 ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column) 124 @staticmethod
76 min_row = 1 if len(moved_col_set) == 0 else 2 125 def get_data_col_min_row(sheet, sheet_header_info, header_info, classify):
77 return min_row 126 date_col = sheet_header_info.get(sheet, {}).get(consts.DATE_KEY)
127 if date_col is None:
128 date_col_dict = header_info.get(consts.DATE_KEY, {})
129 find_col_set = sheet_header_info.get(sheet, {}).get(consts.FIND_COL_KEY, set())
130 for idx in sorted(date_col_dict, key=lambda x: date_col_dict[x], reverse=True):
131 if idx in find_col_set:
132 continue
133 date_col = idx
134 break
135 else:
136 fixed_col = consts.CLASSIFY_LIST[classify][1][consts.CLASSIFY_MAP[consts.DATE_KEY]]
137 if fixed_col not in find_col_set:
138 date_col = fixed_col
139 min_row = sheet_header_info.get(sheet, {}).get(consts.MIN_ROW_KEY, 2)
140 return date_col, min_row
78 141
79 @staticmethod 142 @staticmethod
80 def month_split(dti, date_list, date_statistics): 143 def month_split(dti, date_list, date_statistics):
...@@ -122,8 +185,14 @@ class BSWorkbook(Workbook): ...@@ -122,8 +185,14 @@ class BSWorkbook(Workbook):
122 reverse_trend = -1 185 reverse_trend = -1
123 return reverse_trend 186 return reverse_trend
124 187
125 def sheet_split(self, ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics): 188 def sheet_split(self, ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics):
126 for date_tuple_src in ws.iter_cols(min_col=1, max_col=1, min_row=min_row, values_only=True): 189 if date_col is None:
190 # month_info process
191 month_info = month_mapping.setdefault('xxxx-xx', [])
192 month_info.append((ws.title, min_row, ws.max_row, 0))
193 return
194 date_col = date_col + 1
195 for date_tuple_src in ws.iter_cols(min_col=date_col, max_col=date_col, min_row=min_row, values_only=True):
127 date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src] 196 date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src]
128 dt_array, tz_parsed = tslib.array_to_datetime( 197 dt_array, tz_parsed = tslib.array_to_datetime(
129 np.array(date_tuple, copy=False, dtype=np.object_), 198 np.array(date_tuple, copy=False, dtype=np.object_),
...@@ -199,11 +268,12 @@ class BSWorkbook(Workbook): ...@@ -199,11 +268,12 @@ class BSWorkbook(Workbook):
199 return amount_str 268 return amount_str
200 # 1.替换 269 # 1.替换
201 res_str = amount_str.translate(consts.TRANS) 270 res_str = amount_str.translate(consts.TRANS)
202 # 2.删除多余的- 271 # 2.首字符处理
203 res_str = res_str[0] + res_str[1:].replace('-', '') 272 first_char = res_str[0]
204 # 3.首字符处理 273 if first_char in consts.ERROR_CHARS:
205 if res_str[0] in consts.ERROR_CHARS: 274 first_char = '-'
206 res_str = '-{0}'.format(res_str[1:]) 275 # 3.删除多余的-
276 res_str = first_char + res_str[1:].replace('-', '')
207 # 4.逗号与句号处理 277 # 4.逗号与句号处理
208 if len(res_str) >= 4: 278 if len(res_str) >= 4:
209 period_idx = len(res_str) - 3 279 period_idx = len(res_str) - 3
...@@ -213,13 +283,29 @@ class BSWorkbook(Workbook): ...@@ -213,13 +283,29 @@ class BSWorkbook(Workbook):
213 res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:]) 283 res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:])
214 return res_str 284 return res_str
215 285
216 def build_month_sheet(self, card, month_mapping, ms, is_reverse): 286 def build_month_sheet(self, ms, card, month_mapping, is_reverse, statistics_header_info):
287 summary_cell_idx = statistics_header_info.get(consts.SUMMARY_KEY)
288 date_cell_idx = statistics_header_info.get(consts.DATE_KEY)
289 amount_cell_idx = statistics_header_info.get(consts.AMOUNT_KEY) # None or src or append
290 over_cell_idx = statistics_header_info.get(consts.OVER_KEY)
291 income_cell_idx = statistics_header_info.get(consts.IMCOME_KEY)
292 outlay_cell_idx = statistics_header_info.get(consts.OUTLAY_KEY)
293 borrow_cell_idx = statistics_header_info.get(consts.BORROW_KEY)
294 header = list(statistics_header_info.get(consts.HEADER_KEY))
295
296 add_col = ['核对结果']
297 if amount_cell_idx is None:
298 if income_cell_idx is not None or outlay_cell_idx is not None:
299 add_col = ['金额', '核对结果']
300 amount_cell_idx = len(header)
301 header.extend(add_col)
302
217 tmp_ws = self.create_sheet('tmp_ws') 303 tmp_ws = self.create_sheet('tmp_ws')
218 for month in sorted(month_mapping.keys()): 304 for month in sorted(month_mapping.keys()):
219 # 3.1.拷贝数据 305 # 3.1.拷贝数据
220 parts = month_mapping.get(month) 306 parts = month_mapping.get(month)
221 new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:])) 307 new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:]))
222 new_ws.append(consts.FIXED_HEADERS) 308 new_ws.append(header)
223 for part in parts: 309 for part in parts:
224 ws = self.get_sheet_by_name(part[0]) 310 ws = self.get_sheet_by_name(part[0])
225 for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True): 311 for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True):
...@@ -227,76 +313,95 @@ class BSWorkbook(Workbook): ...@@ -227,76 +313,95 @@ class BSWorkbook(Workbook):
227 # 3.2.提取信息、高亮 313 # 3.2.提取信息、高亮
228 amount_mapping = {} 314 amount_mapping = {}
229 amount_fill_row = set() 315 amount_fill_row = set()
316
230 for rows in new_ws.iter_rows(min_row=2): 317 for rows in new_ws.iter_rows(min_row=2):
231 summary_cell = rows[consts.SUMMARY_IDX] 318 # TODO 删除空行
232 date_cell = rows[consts.DATE_IDX] 319 summary_cell = None if summary_cell_idx is None else rows[summary_cell_idx]
233 amount_cell = rows[consts.AMOUNT_IDX] 320 date_cell = None if summary_cell_idx is None else rows[date_cell_idx]
234 row = summary_cell.row 321 amount_cell = None if summary_cell_idx is None else rows[amount_cell_idx]
322 over_cell = None if summary_cell_idx is None else rows[over_cell_idx]
323 income_cell = None if summary_cell_idx is None else rows[income_cell_idx]
324 outlay_cell = None if summary_cell_idx is None else rows[outlay_cell_idx]
325 borrow_cell = None if summary_cell_idx is None else rows[borrow_cell_idx]
326
327 summary_cell_value = None if summary_cell is None else summary_cell.value
328 date_cell_value = None if summary_cell is None else date_cell.value
329 amount_cell_value = None if summary_cell is None else amount_cell.value
330 over_cell_value = None if summary_cell is None else over_cell.value
331 income_cell_value = None if summary_cell is None else income_cell.value
332 outlay_cell_value = None if summary_cell is None else outlay_cell.value
333 borrow_cell_value = None if summary_cell is None else borrow_cell.value
334
335 # row = summary_cell.row
336 if summary_cell is not None:
235 # 关键词1提取 337 # 关键词1提取
236 if summary_cell.value in self.interest_keyword: 338 if summary_cell_value in self.interest_keyword:
237 ms.append((summary_cell.value, date_cell.value, amount_cell.value)) 339 ms.append((summary_cell_value, date_cell_value, amount_cell_value))
238 # 关键词2提取至临时表 340 # 关键词2提取至临时表
239 elif summary_cell.value in self.salary_keyword: 341 elif summary_cell_value in self.salary_keyword:
240 tmp_ws.append((summary_cell.value, date_cell.value, amount_cell.value)) 342 tmp_ws.append((summary_cell_value, date_cell_value, amount_cell_value))
241 # 贷款关键词高亮 343 # 贷款关键词高亮
242 elif summary_cell.value in self.loan_keyword: 344 elif summary_cell_value in self.loan_keyword:
243 summary_cell.fill = self.loan_fill 345 summary_cell.fill = self.loan_fill
244 346
245 amount_error = False
246 # 3.3.余额转数值 347 # 3.3.余额转数值
247 over_cell = rows[consts.OVER_IDX] 348 over_success = False
349 if over_cell is not None:
248 try: 350 try:
249 over_cell.value = locale.atof(self.amount_format(over_cell.value)) 351 over_cell.value = locale.atof(self.amount_format(over_cell_value))
250 except Exception as e: 352 except Exception as e:
251 amount_error = True 353 pass
252 else: 354 else:
355 over_success = True
253 over_cell.number_format = numbers.FORMAT_NUMBER_00 356 over_cell.number_format = numbers.FORMAT_NUMBER_00
254 357
255 # 3.4.金额转数值 358 # 3.4.金额转数值
359 amount_success = False
360 if amount_cell is not None:
256 try: 361 try:
257 try: 362 try:
258 amount_cell.value = locale.atof(self.amount_format(amount_cell.value)) 363 amount_cell.value = locale.atof(self.amount_format(amount_cell_value))
259 except Exception as e: 364 except Exception as e:
260 try: 365 try:
261 amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value)) 366 amount_cell.value = locale.atof(self.amount_format(income_cell_value))
262 if amount_cell.value == 0: 367 if amount_cell.value == 0:
263 raise 368 raise
264 elif amount_cell.value < 0: 369 elif amount_cell.value < 0:
265 amount_cell.value = -amount_cell.value 370 amount_cell.value = -amount_cell.value
266 except Exception as e: 371 except Exception as e:
267 amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value)) 372 amount_cell.value = locale.atof(self.amount_format(outlay_cell_value))
268 if amount_cell.value > 0: 373 if amount_cell.value > 0:
269 amount_cell.value = -amount_cell.value 374 amount_cell.value = -amount_cell.value
270 except Exception as e: 375 except Exception as e:
271 amount_error = True 376 pass
272 else: 377 else:
273 if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET: 378 amount_success = True
379 if borrow_cell_value in consts.BORROW_OUTLAY_SET:
274 amount_cell.value = -amount_cell.value 380 amount_cell.value = -amount_cell.value
275 amount_cell.number_format = numbers.FORMAT_NUMBER_00 381 amount_cell.number_format = numbers.FORMAT_NUMBER_00
276 same_amount_mapping = amount_mapping.get(date_cell.value, {}) 382 same_amount_mapping = amount_mapping.get(date_cell.value, {})
277 fill_rows = same_amount_mapping.get(-amount_cell.value) 383 fill_rows = same_amount_mapping.get(-amount_cell.value)
278 if fill_rows: 384 if fill_rows:
279 amount_fill_row.add(row) 385 amount_fill_row.add(amount_cell.row)
280 amount_fill_row.update(fill_rows) 386 amount_fill_row.update(fill_rows)
281 amount_mapping.setdefault(date_cell.value, {}).setdefault( 387 amount_mapping.setdefault(date_cell.value, {}).setdefault(
282 amount_cell.value, []).append(row) 388 amount_cell.value, []).append(amount_cell.row)
283 389
284 # 3.5.核对结果 390 # 3.5.核对结果
285 if row > 2 and not amount_error: 391 if amount_success and over_success and amount_cell.row > 2:
392 amount_col_letter = get_column_letter(amount_cell_idx + 1)
393 over_col_letter = get_column_letter(over_cell_idx + 1)
286 if is_reverse: 394 if is_reverse:
287 rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format( 395 rows[consts.RESULT_IDX].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
288 row - 1, row, *self.proof_res) 396 amount_cell.row - 1, amount_cell.row, over_col_letter, amount_col_letter, *self.proof_res)
289 else: 397 else:
290 rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format( 398 rows[consts.RESULT_IDX].value = '=IF({2}{0}=ROUND(SUM({2}{1},{3}{0}),4), "{4}", "{5}")'.format(
291 row, row - 1, *self.proof_res) 399 amount_cell.row, amount_cell.row - 1, over_col_letter, amount_col_letter, *self.proof_res)
292
293 # 删除金额辅助列
294 new_ws.delete_cols(consts.BORROW_HEADER_COL, amount=new_ws.max_column)
295 400
296 # 3.6.同一天相同进出账高亮 401 # 3.6.同一天相同进出账高亮
297 del amount_mapping 402 del amount_mapping
298 for row in amount_fill_row: 403 for row in amount_fill_row:
299 new_ws[row][consts.AMOUNT_IDX].fill = self.amount_fill 404 new_ws[row][amount_cell_idx].fill = self.amount_fill
300 405
301 # 关键词2信息提取 406 # 关键词2信息提取
302 ms.append(self.blank_row) 407 ms.append(self.blank_row)
...@@ -319,21 +424,27 @@ class BSWorkbook(Workbook): ...@@ -319,21 +424,27 @@ class BSWorkbook(Workbook):
319 # } 424 # }
320 # } 425 # }
321 for card, summary in bs_summary.items(): 426 for card, summary in bs_summary.items():
322 # 1.原表修剪、排列、按照月份分割 427 # 1.原表表头收集、按照月份分割
428 # 1.1 总结首行信息
429 classify = summary.get('classify', 0)
430 sheet_header_info = {}
431 header_info = {}
432 for sheet in summary.get('sheet', []):
433 ws = self.get_sheet_by_name(sheet)
434 self.header_collect(ws, sheet_header_info, header_info)
435 statistics_header_info = self.header_statistics(sheet_header_info, header_info, classify)
436
437 # 1.2.按月份分割 min_row 正文第一行 date_col 日期行
323 start_date = summary.get('start_date') 438 start_date = summary.get('start_date')
324 end_date = summary.get('end_date') 439 end_date = summary.get('end_date')
325 date_statistics = False 440 date_statistics = True if start_date is None or end_date is None else False # 用于判断是否需要收集各表中日期
326 if start_date is None or end_date is None: 441 date_list = [] # 用于收集各表中日期
327 date_statistics = True 442 month_mapping = {} # 用于创建月份表
328 date_list = [] 443 reverse_trend_list = [] # 用于判断倒序与正序
329 month_mapping = {}
330 reverse_trend_list = []
331 for sheet in summary.get('sheet', []): 444 for sheet in summary.get('sheet', []):
332 ws = self.get_sheet_by_name(sheet) 445 ws = self.get_sheet_by_name(sheet)
333 # 1.1.删除多余列、排列 446 date_col, min_row = self.get_data_col_min_row(sheet, sheet_header_info, header_info, classify)
334 min_row = self.sheet_prune(ws, summary.get('classify', 0)) 447 self.sheet_split(ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics)
335 # 1.2.按月份分割
336 self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics)
337 448
338 if date_statistics is True and len(date_list) > 1: 449 if date_statistics is True and len(date_list) > 1:
339 start_date = min(date_list) if start_date is None else start_date 450 start_date = min(date_list) if start_date is None else start_date
...@@ -353,7 +464,7 @@ class BSWorkbook(Workbook): ...@@ -353,7 +464,7 @@ class BSWorkbook(Workbook):
353 for month_list in month_mapping.values(): 464 for month_list in month_mapping.values():
354 month_list.sort(key=lambda x: x[-1], reverse=is_reverse) 465 month_list.sort(key=lambda x: x[-1], reverse=is_reverse)
355 466
356 self.build_month_sheet(card, month_mapping, ms, is_reverse) 467 self.build_month_sheet(ms, card, month_mapping, is_reverse, statistics_header_info)
357 468
358 # 4.删除原表 469 # 4.删除原表
359 for sheet in summary.get('sheet'): 470 for sheet in summary.get('sheet'):
......
1 import locale
2 import numpy as np
3 from pandas._libs import tslib
4 from pandas._libs.tslibs.nattype import NaTType
5 from pandas.core.indexes.datetimes import DatetimeIndex
6 from openpyxl import Workbook
7 from openpyxl.styles import Border, Side, PatternFill, numbers
8 from openpyxl.utils import get_column_letter
9 from apps.doc import consts
10
11
12 class BSWorkbook(Workbook):
13
14 def __init__(self, interest_keyword, salary_keyword, loan_keyword, *args, **kwargs):
15 super().__init__(*args, **kwargs)
16 locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
17 self.meta_sheet_title = '关键信息提取和展示'
18 self.blank_row = (None,)
19 self.code_header = ('页数', '电子回单验证码')
20 self.date_header = ('打印时间', '起始日期', '终止日期', '流水区间结果')
21 self.keyword_header = ('关键词', '记账日期', '金额')
22 self.interest_keyword = interest_keyword
23 self.salary_keyword = salary_keyword
24 self.loan_keyword = loan_keyword
25 self.proof_res = ('对', '错')
26 self.loan_fill = PatternFill("solid", fgColor="00FFCC00")
27 self.amount_fill = PatternFill("solid", fgColor="00FFFF00")
28 # self.bd = Side(style='thin', color="000000")
29 # self.border = Border(left=self.bd, top=self.bd, right=self.bd, bottom=self.bd)
30 self.MAX_MEAN = 31
31
32 @staticmethod
33 def sheet_prune(ws, classify):
34 ws.insert_cols(1, amount=consts.FIXED_COL_AMOUNT)
35 moved_col_set = set()
36 header_col_set = set()
37 # 根据第一行关键词排列
38 for col in range(consts.FIXED_COL_AMOUNT + 1, ws.max_column + 1):
39 header_value = ws.cell(1, col).value
40 header_col = consts.HEADERS_MAPPING.get(header_value)
41 if header_col is not None and header_col not in header_col_set:
42 letter = get_column_letter(col)
43 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - col)
44 moved_col_set.add(col)
45 header_col_set.add(header_col)
46 elif header_value in consts.BORROW_HEADERS_SET:
47 letter = get_column_letter(col)
48 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.BORROW_HEADER_COL - col)
49 moved_col_set.add(col)
50 header_col_set.add(consts.BORROW_HEADER_COL)
51 elif header_value in consts.INCOME_HEADERS_SET:
52 letter = get_column_letter(col)
53 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.INCOME_HEADER_COL - col)
54 moved_col_set.add(col)
55 header_col_set.add(consts.INCOME_HEADER_COL)
56 elif header_value in consts.OUTLAY_HEADERS_SET:
57 letter = get_column_letter(col)
58 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=consts.OUTLAY_HEADER_COL - col)
59 moved_col_set.add(col)
60 header_col_set.add(consts.OUTLAY_HEADER_COL)
61
62 # 缺失表头再次查找
63 for header_col in range(1, consts.FIXED_COL_AMOUNT + 1):
64 if header_col in header_col_set or header_col == consts.RESULT_HEADER_COL:
65 continue
66 fix_col = consts.CLASSIFY_LIST[classify][1][header_col - 1]
67 if fix_col is None:
68 continue
69 fix_col = fix_col + consts.FIXED_COL_AMOUNT
70 if fix_col in moved_col_set:
71 break
72 letter = get_column_letter(fix_col)
73 ws.move_range("{0}1:{0}{1}".format(letter, ws.max_row), cols=header_col - fix_col)
74
75 ws.delete_cols(consts.FIXED_COL_AMOUNT + 1, amount=ws.max_column)
76 min_row = 1 if len(moved_col_set) == 0 else 2
77 return min_row
78
79 @staticmethod
80 def month_split(dti, date_list, date_statistics):
81 month_list = []
82 idx_list = []
83 month_pre = None
84 for idx, month_str in enumerate(dti.strftime('%Y-%m')):
85 if isinstance(month_str, float):
86 continue
87 if month_str != month_pre:
88 month_list.append(month_str)
89 if month_pre is None:
90 if date_statistics:
91 date_list.append(dti[idx].date())
92 idx = 0
93 idx_list.append(idx)
94 month_pre = month_str
95 if date_statistics:
96 for idx in range(len(dti) - 1, -1, -1):
97 if isinstance(dti[idx], NaTType):
98 continue
99 date_list.append(dti[idx].date())
100 break
101 return month_list, idx_list
102
103 @staticmethod
104 def get_reverse_trend(day_idx, idx_list):
105 reverse_trend = 0
106 pre_day = None
107 for idx, day in enumerate(day_idx):
108 if np.isnan(day):
109 continue
110 if idx in idx_list or pre_day is None:
111 pre_day = day
112 continue
113 if day < pre_day:
114 reverse_trend += 1
115 pre_day = day
116 elif day > pre_day:
117 reverse_trend -= 1
118 pre_day = day
119 if reverse_trend > 0:
120 reverse_trend = 1
121 elif reverse_trend < 0:
122 reverse_trend = -1
123 return reverse_trend
124
125 def sheet_split(self, ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics):
126 for date_tuple_src in ws.iter_cols(min_col=1, max_col=1, min_row=min_row, values_only=True):
127 date_tuple = [date[:10] if isinstance(date, str) else date for date in date_tuple_src]
128 dt_array, tz_parsed = tslib.array_to_datetime(
129 np.array(date_tuple, copy=False, dtype=np.object_),
130 errors="coerce",
131 utc=False,
132 dayfirst=False,
133 yearfirst=False,
134 require_iso8601=True,
135 )
136 dti = DatetimeIndex(dt_array, tz=None, name=None)
137
138 month_list, idx_list = self.month_split(dti, date_list, date_statistics)
139
140 if len(month_list) == 0:
141 # month_info process
142 month_info = month_mapping.setdefault('xxxx-xx', [])
143 month_info.append((ws.title, min_row, ws.max_row, 0))
144 else:
145 # reverse_trend_list process
146 reverse_trend = self.get_reverse_trend(dti.day, idx_list)
147 reverse_trend_list.append(reverse_trend)
148 # month_info process
149 day_idx = dti.day
150 idx_list_max_idx = len(idx_list) - 1
151 for i, item in enumerate(month_list):
152 if i == idx_list_max_idx:
153 day_mean = np.mean(day_idx[idx_list[i]:].dropna())
154 month_mapping.setdefault(item, []).append(
155 (ws.title, idx_list[i] + min_row, ws.max_row, day_mean))
156 else:
157 day_mean = np.mean(day_idx[idx_list[i]: idx_list[i + 1]].dropna())
158 month_mapping.setdefault(item, []).append(
159 (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean))
160
161 def build_metadata_rows(self, confidence, code, print_time, start_date, end_date):
162 if start_date is None or end_date is None:
163 timedelta = None
164 else:
165 timedelta = (end_date - start_date).days
166 metadata_rows = [
167 ('流水识别置信度', confidence),
168 self.blank_row,
169 self.code_header,
170 ]
171 metadata_rows.extend(code)
172 metadata_rows.extend(
173 [self.blank_row,
174 self.date_header,
175 (print_time, start_date, end_date, timedelta),
176 self.blank_row,
177 self.keyword_header]
178 )
179 return metadata_rows
180
181 def create_meta_sheet(self, card):
182 if self.worksheets[0].title == 'Sheet':
183 ms = self.worksheets[0]
184 ms.title = '{0}({1})'.format(self.meta_sheet_title, card[-6:])
185 else:
186 ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card[-6:]))
187 return ms
188
189 def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date):
190 metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date)
191 ms = self.create_meta_sheet(card)
192 for row in metadata_rows:
193 ms.append(row)
194 return ms
195
196 @staticmethod
197 def amount_format(amount_str):
198 if not isinstance(amount_str, str) or amount_str == '':
199 return amount_str
200 # 1.替换
201 res_str = amount_str.translate(consts.TRANS)
202 # 2.首字符处理
203 first_char = res_str[0]
204 if first_char in consts.ERROR_CHARS:
205 first_char = '-'
206 # 3.删除多余的-
207 res_str = first_char + res_str[1:].replace('-', '')
208 # 4.逗号与句号处理
209 if len(res_str) >= 4:
210 period_idx = len(res_str) - 3
211 if res_str[period_idx] == '.' and res_str[period_idx - 1] == ',':
212 res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:])
213 elif res_str[period_idx] == ',':
214 res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:])
215 return res_str
216
217 def build_month_sheet(self, card, month_mapping, ms, is_reverse):
218 tmp_ws = self.create_sheet('tmp_ws')
219 for month in sorted(month_mapping.keys()):
220 # 3.1.拷贝数据
221 parts = month_mapping.get(month)
222 new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:]))
223 new_ws.append(consts.FIXED_HEADERS)
224 for part in parts:
225 ws = self.get_sheet_by_name(part[0])
226 for row_value in ws.iter_rows(min_row=part[1], max_row=part[2], values_only=True):
227 new_ws.append(row_value)
228 # 3.2.提取信息、高亮
229 amount_mapping = {}
230 amount_fill_row = set()
231 for rows in new_ws.iter_rows(min_row=2):
232 summary_cell = rows[consts.SUMMARY_IDX]
233 date_cell = rows[consts.DATE_IDX]
234 amount_cell = rows[consts.AMOUNT_IDX]
235 row = summary_cell.row
236 # 关键词1提取
237 if summary_cell.value in self.interest_keyword:
238 ms.append((summary_cell.value, date_cell.value, amount_cell.value))
239 # 关键词2提取至临时表
240 elif summary_cell.value in self.salary_keyword:
241 tmp_ws.append((summary_cell.value, date_cell.value, amount_cell.value))
242 # 贷款关键词高亮
243 elif summary_cell.value in self.loan_keyword:
244 summary_cell.fill = self.loan_fill
245
246 amount_error = False
247 # 3.3.余额转数值
248 over_cell = rows[consts.OVER_IDX]
249 try:
250 over_cell.value = locale.atof(self.amount_format(over_cell.value))
251 except Exception as e:
252 amount_error = True
253 else:
254 over_cell.number_format = numbers.FORMAT_NUMBER_00
255
256 # 3.4.金额转数值
257 try:
258 try:
259 amount_cell.value = locale.atof(self.amount_format(amount_cell.value))
260 except Exception as e:
261 try:
262 amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value))
263 if amount_cell.value == 0:
264 raise
265 elif amount_cell.value < 0:
266 amount_cell.value = -amount_cell.value
267 except Exception as e:
268 amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value))
269 if amount_cell.value > 0:
270 amount_cell.value = -amount_cell.value
271 except Exception as e:
272 amount_error = True
273 else:
274 if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET:
275 amount_cell.value = -amount_cell.value
276 amount_cell.number_format = numbers.FORMAT_NUMBER_00
277 same_amount_mapping = amount_mapping.get(date_cell.value, {})
278 fill_rows = same_amount_mapping.get(-amount_cell.value)
279 if fill_rows:
280 amount_fill_row.add(row)
281 amount_fill_row.update(fill_rows)
282 amount_mapping.setdefault(date_cell.value, {}).setdefault(
283 amount_cell.value, []).append(row)
284
285 # 3.5.核对结果
286 if row > 2 and not amount_error:
287 if is_reverse:
288 rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format(
289 row - 1, row, *self.proof_res)
290 else:
291 rows[consts.RESULT_IDX].value = '=IF(D{0}=ROUND(SUM(D{1},C{0}),2), "{2}", "{3}")'.format(
292 row, row - 1, *self.proof_res)
293
294 # 删除金额辅助列
295 new_ws.delete_cols(consts.BORROW_HEADER_COL, amount=new_ws.max_column)
296
297 # 3.6.同一天相同进出账高亮
298 del amount_mapping
299 for row in amount_fill_row:
300 new_ws[row][consts.AMOUNT_IDX].fill = self.amount_fill
301
302 # 关键词2信息提取
303 ms.append(self.blank_row)
304 ms.append(self.keyword_header)
305 for row in tmp_ws.iter_rows(values_only=True):
306 ms.append(row)
307 self.remove(tmp_ws)
308
309 def bs_rebuild(self, bs_summary):
310 # bs_summary = {
311 # '卡号': {
312 # 'classify': 0,
313 # 'confidence': 0.9,
314 # 'role': '柳雪',
315 # 'code': [('page', 'code')],
316 # 'print_time': 'datetime',
317 # 'start_date': 'datetime',
318 # 'end_date': 'datetime',
319 # 'sheet': ['sheet_name']
320 # }
321 # }
322 for card, summary in bs_summary.items():
323 # 1.原表修剪、排列、按照月份分割
324 start_date = summary.get('start_date')
325 end_date = summary.get('end_date')
326 date_statistics = False
327 if start_date is None or end_date is None:
328 date_statistics = True
329 date_list = []
330 month_mapping = {}
331 reverse_trend_list = []
332 for sheet in summary.get('sheet', []):
333 ws = self.get_sheet_by_name(sheet)
334 # 1.1.删除多余列、排列
335 min_row = self.sheet_prune(ws, summary.get('classify', 0))
336 # 1.2.按月份分割
337 self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics)
338
339 if date_statistics is True and len(date_list) > 1:
340 start_date = min(date_list) if start_date is None else start_date
341 end_date = max(date_list) if end_date is None else end_date
342
343 # 2.元信息提取表
344 ms = self.build_meta_sheet(card,
345 summary.get('confidence', 1),
346 summary.get('code'),
347 summary.get('print_time'),
348 start_date,
349 end_date)
350
351 # 3.创建月份表、提取/高亮关键行
352 # 倒序处理
353 is_reverse = True if sum(reverse_trend_list) > 0 else False
354 for month_list in month_mapping.values():
355 month_list.sort(key=lambda x: x[-1], reverse=is_reverse)
356
357 self.build_month_sheet(card, month_mapping, ms, is_reverse)
358
359 # 4.删除原表
360 for sheet in summary.get('sheet'):
361 self.remove(self.get_sheet_by_name(sheet))
362
363 def license_rebuild(self, license_summary, document_scheme):
364 for classify, (_, name, field_order, side_diff, scheme_diff) in consts.LICENSE_ORDER:
365 license_list = license_summary.get(classify)
366 if not license_list:
367 continue
368 ws = self.create_sheet(name)
369 if scheme_diff and document_scheme == consts.DOC_SCHEME_LIST[1]:
370 classify = consts.MVC_CLASSIFY_SE
371 for license_dict in license_list:
372 if classify == consts.IC_CLASSIFY and license_dict.get('类别') == '1':
373 license_summary.setdefault(consts.RP_CLASSIFY, []).append(license_dict)
374 continue
375 if side_diff:
376 key, field_order_yes, field_order_no = consts.FIELD_ORDER_MAP.get(classify)
377 field_order = field_order_yes if key in license_dict else field_order_no
378 for search_field, write_field in field_order:
379 ws.append((write_field, license_dict.get(search_field, '')))
380 ws.append((None, ))
381
382 def skip_img_sheet(self, skip_img):
383 if skip_img:
384 ws = self.create_sheet(consts.SKIP_IMG_SHEET_NAME)
385 ws.append(consts.SKIP_IMG_SHEET_HEADER)
386 for img_tuple in skip_img:
387 ws.append(img_tuple)
388
389 def rebuild(self, bs_summary, license_summary, skip_img, document_scheme):
390 self.bs_rebuild(bs_summary)
391 self.license_rebuild(license_summary, document_scheme)
392 self.skip_img_sheet(skip_img)
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!