add license
Showing
4 changed files
with
319 additions
and
179 deletions
| ... | @@ -95,7 +95,7 @@ HEADERS_MAPPING.update( | ... | @@ -95,7 +95,7 @@ HEADERS_MAPPING.update( |
| 95 | HEADERS_MAPPING.update( | 95 | HEADERS_MAPPING.update( |
| 96 | { | 96 | { |
| 97 | '交易日期': BASE_HEADERS_MAPPING['记账日期'], | 97 | '交易日期': BASE_HEADERS_MAPPING['记账日期'], |
| 98 | '存入': BASE_HEADERS_MAPPING['金额'], | 98 | # '存入': BASE_HEADERS_MAPPING['金额'], |
| 99 | '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | 99 | '对方账号': BASE_HEADERS_MAPPING['对方卡号/账号'], |
| 100 | '对方名称': BASE_HEADERS_MAPPING['对方账户名'], | 100 | '对方名称': BASE_HEADERS_MAPPING['对方账户名'], |
| 101 | '摘要': BASE_HEADERS_MAPPING['附言'], | 101 | '摘要': BASE_HEADERS_MAPPING['附言'], |
| ... | @@ -160,6 +160,12 @@ HEADERS_MAPPING.update( | ... | @@ -160,6 +160,12 @@ HEADERS_MAPPING.update( |
| 160 | '交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'], | 160 | '交易地点/对方账号和户名': BASE_HEADERS_MAPPING['对方卡号/账号'], |
| 161 | } | 161 | } |
| 162 | ) | 162 | ) |
| 163 | # 农业银行-窄页 | ||
| 164 | HEADERS_MAPPING.update( | ||
| 165 | { | ||
| 166 | '交易对手账号': BASE_HEADERS_MAPPING['对方卡号/账号'], | ||
| 167 | } | ||
| 168 | ) | ||
| 163 | # 竖版-特殊-农商行 | 169 | # 竖版-特殊-农商行 |
| 164 | HEADERS_MAPPING.update( | 170 | HEADERS_MAPPING.update( |
| 165 | { | 171 | { |
| ... | @@ -299,17 +305,27 @@ HEADERS_MAPPING.update( | ... | @@ -299,17 +305,27 @@ HEADERS_MAPPING.update( |
| 299 | # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则) | 305 | # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), # 横版-表格-中国银行(不规则) |
| 300 | # | 306 | # |
| 301 | # # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言 | 307 | # # 农业银行:交易日期 交易网点 存入 支出 余额 对方账号 对方名称 摘要 渠道 附言 |
| 302 | # ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)), # 横版-表格-农业银行-中国农业银行个人账户明细 | 308 | # ('农业银行-10', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), # 横版-表格-农业银行-中国农业银行个人账户明细 |
| 309 | # | ||
| 310 | # # 农业银行:序号 日期 摘要 交易金额 余额 对方账号 对方名称 交易地点 渠道 附言 | ||
| 311 | # ('农业银行-10-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)), | ||
| 312 | # | ||
| 313 | # # 农业银行:交易日期 摘要 交易金额 余额 交易渠道 交易网点 对方账号 对方名称 附言 | ||
| 314 | # ('农业银行-9', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)), | ||
| 303 | # | 315 | # |
| 304 | # # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道 | 316 | # # 北京银行:交易日期 业务摘要 收/支 发生额 余额 对方户名 对方账号 交易渠道 |
| 305 | # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行 | 317 | # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), # 横版-表格-北京银行 |
| 306 | # | 318 | # |
| 307 | # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道 | 319 | # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 渠道 |
| 308 | # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | 320 | # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), |
| 309 | # | 321 | # |
| 322 | # # 工商银行:交易日期 账号 储种 序号 币种 钞汇 摘要 地区 收入/支出金额 余额 对方户名 对方账号 渠道 | ||
| 323 | # ('工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)), | ||
| 324 | # | ||
| 310 | # # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行 | 325 | # # 建设银行:空 摘要 交易日期 交易金额 账户余额 商户/网点号及其名称 对方账号与户名 --> 竖版-表格-建设银行 |
| 311 | # # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行 | 326 | # # 序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 --> 横版-表格-建设银行 |
| 312 | # ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)), | 327 | # ('建设银行-竖版', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)), |
| 328 | # ('建设银行-横版', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
| 313 | # | 329 | # |
| 314 | # # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号 | 330 | # # 微信:交易单号 交易时间 交易类型 收/支/其他 交易方式 金额(元) 交易对方 商户单号 |
| 315 | # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | 331 | # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), |
| ... | @@ -320,7 +336,13 @@ HEADERS_MAPPING.update( | ... | @@ -320,7 +336,13 @@ HEADERS_MAPPING.update( |
| 320 | # # -----------------普通打印:部分格线-------------------------------- | 336 | # # -----------------普通打印:部分格线-------------------------------- |
| 321 | # | 337 | # |
| 322 | # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名 | 338 | # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名 |
| 323 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | 339 | # ('农业银行-5', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), |
| 340 | # | ||
| 341 | # # 农业银行:日期 地点 摘要 存入 支出 余额 对方账号 对方户名 | ||
| 342 | # ('农业银行-8', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)), | ||
| 343 | |||
| 344 | # # 农业银行:日期 摘要 交易金额 余额 地点 交易对手账号 对方户名 | ||
| 345 | # ('农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)), | ||
| 324 | # | 346 | # |
| 325 | # # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注 | 347 | # # 农商行:交易日期 交易发生额 账户余额 对方账号 对方户名 摘要 备注 |
| 326 | # ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)), | 348 | # ('农商行', (1, None, 2, 3, None, 6, 5, 4, None, None, None, None, None)), |
| ... | @@ -330,6 +352,9 @@ HEADERS_MAPPING.update( | ... | @@ -330,6 +352,9 @@ HEADERS_MAPPING.update( |
| 330 | # | 352 | # |
| 331 | # # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额 | 353 | # # 平安电子账单:序号 交易日期 交易网点 摘要 借贷发生额(借:-贷:+) 账户余额 |
| 332 | # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | 354 | # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), |
| 355 | |||
| 356 | # # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 | ||
| 357 | # ('建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
| 333 | # | 358 | # |
| 334 | # # -----------------普通打印:无格线------------------------------------- | 359 | # # -----------------普通打印:无格线------------------------------------- |
| 335 | # | 360 | # |
| ... | @@ -338,7 +363,8 @@ HEADERS_MAPPING.update( | ... | @@ -338,7 +363,8 @@ HEADERS_MAPPING.update( |
| 338 | # | 363 | # |
| 339 | # # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 | 364 | # # 邮储银行:交易日期、交易类型 交易币种 交易金额(元) 账户余额(元) [对手方户名 对手方账户 收支类型] --> 竖版-无表格-邮储银行-账户对账单 含有对手方户名 对手方账户 |
| 340 | # # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单 | 365 | # # 交易日期、交易类型 交易金额(元) 账户余额(元) 操作柜员 --> 竖版-无表格-邮储银行 账户对账单 |
| 341 | # ('邮储银行', (1, None, None, None, None, 2, None, None, None, None, None, None, None)), | 366 | # ('邮储银行-8', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)), |
| 367 | # ('邮储银行-5', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)), | ||
| 342 | # | 368 | # |
| 343 | # # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道 | 369 | # # 工商银行电子版:交易日期 账号 储种 序号 币种 妙汇 摘要 地区 收入/支出金额 余额 [对方户名 对方账号] 渠道 |
| 344 | # ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | 370 | # ('工商银行电子版', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), |
| ... | @@ -351,13 +377,15 @@ HEADERS_MAPPING.update( | ... | @@ -351,13 +377,15 @@ HEADERS_MAPPING.update( |
| 351 | # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | 377 | # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), |
| 352 | # | 378 | # |
| 353 | # # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名 | 379 | # # 农业银行:交易日期 摘要/附言 交易金额 对方账号和户名 |
| 354 | # ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | 380 | # ('农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), |
| 355 | # | 381 | # |
| 356 | # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名 | 382 | # # 农业银行:交易日期 摘要/附言 交易金额 余额 交易地点/对方账号和户名 |
| 357 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | 383 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), |
| 358 | # | 384 | # |
| 359 | # # 农业银行:日期、时间、[日志号]、短摘要、交易金额、本次余额、交易网点、渠道、附言 | 385 | # # 农业银行:日期、时间、短摘要、交易金额、本次余额、交易网点、渠道、附言 |
| 360 | # ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)), | 386 | # # 农业银行:日期、时间、日志号、短摘要、交易金额、本次余额、交易网点、渠道、附言 |
| 387 | # ('农业银行', (1, 2, 4, 5, None, 3, None, None, None, None, None, None, None)), | ||
| 388 | # ('农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
| 361 | # | 389 | # |
| 362 | # # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额 | 390 | # # 交通银行:交易日期 记账日期、交易地点、交易类型、借贷状态、交易金额、余额 |
| 363 | # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | 391 | # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), |
| ... | @@ -374,11 +402,10 @@ HEADERS_MAPPING.update( | ... | @@ -374,11 +402,10 @@ HEADERS_MAPPING.update( |
| 374 | # | 402 | # |
| 375 | # # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称 | 403 | # # 邮储银行:序号、交易日期、交易渠道、摘要、交易金额、账户余额、对方账号/卡号/汇票号、原子账号、交易机构名称 |
| 376 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | 404 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), |
| 377 | # | ||
| 378 | # # 建设银行:序号 摘要 币别 钞汇 交易日期 交易金额 账户余额 交易地点附言 对方账号与户名 | ||
| 379 | # ('建设银行', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
| 380 | # ] | 405 | # ] |
| 381 | 406 | ||
| 407 | OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, None, None) | ||
| 408 | |||
| 382 | # { | 409 | # { |
| 383 | # "0":"其他", | 410 | # "0":"其他", |
| 384 | # "1":"普通打印-全表格-中国农业银行", | 411 | # "1":"普通打印-全表格-中国农业银行", |
| ... | @@ -408,67 +435,163 @@ HEADERS_MAPPING.update( | ... | @@ -408,67 +435,163 @@ HEADERS_MAPPING.update( |
| 408 | # "22":"针式打印-部分格线-邮储银行一本通绿卡" | 435 | # "22":"针式打印-部分格线-邮储银行一本通绿卡" |
| 409 | # } | 436 | # } |
| 410 | 437 | ||
| 438 | # CLASSIFY_LIST = [ | ||
| 439 | # ('其他', OTHER_TUPLE), | ||
| 440 | # ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)), | ||
| 441 | # ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), | ||
| 442 | # ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), | ||
| 443 | # ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | ||
| 444 | # ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)), | ||
| 445 | # ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | ||
| 446 | # ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), | ||
| 447 | # | ||
| 448 | # ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | ||
| 449 | # ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | ||
| 450 | # ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)), | ||
| 451 | # ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), | ||
| 452 | # ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | ||
| 453 | # ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | ||
| 454 | # | ||
| 455 | # ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), | ||
| 456 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
| 457 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
| 458 | # ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
| 459 | # ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
| 460 | # | ||
| 461 | # ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)), | ||
| 462 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
| 463 | # ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
| 464 | # ] | ||
| 465 | |||
| 466 | # "4":"普通打印-全表格-中国银行", | ||
| 467 | # "5":"普通打印-全表格-农业银行-10列", | ||
| 468 | # "6":"普通打印-全表格-农业银行-10列-1", | ||
| 469 | # "7":"普通打印-全表格-农业银行-9列", | ||
| 470 | # "8":"普通打印-全表格-北京银行", | ||
| 471 | # "9":"普通打印-全表格-工商银行", | ||
| 472 | # "10":"普通打印-全表格-工商银行-电子账单", | ||
| 473 | # "11":"普通打印-全表格-建设银行", | ||
| 474 | # "12":"普通打印-全表格-微信账单", | ||
| 475 | # "13":"普通打印-全表格-支付宝账单", | ||
| 476 | |||
| 477 | # "14":"普通打印-无格线-交通银行", | ||
| 478 | # "15":"普通打印-无格线-储蓄银行-5列", | ||
| 479 | # "16":"普通打印-无格线-储蓄银行-8列", | ||
| 480 | # "17":"普通打印-无格线-农业银行-扩张缩进", | ||
| 481 | # "18":"普通打印-无格线-农业银行-整数", | ||
| 482 | # "19":"普通打印-无格线-招商银行", | ||
| 483 | # "20":"普通打印-无格线-招商银行-电子账单", | ||
| 484 | # "21":"普通打印-无格线-民生银行", | ||
| 485 | |||
| 486 | # "22":"普通打印-部分格线-横版-中信银行", | ||
| 487 | # "23":"普通打印-部分格线-竖版-农业银行-5列", | ||
| 488 | # "24":"普通打印-部分格线-竖版-农业银行-8列", | ||
| 489 | # "25":"普通打印-部分格线-竖版-农业银行-窄页", | ||
| 490 | # "26":"普通打印-部分格线-竖版-平安电子账单", | ||
| 491 | # "27":"普通打印-部分格线-竖版-建设银行-电子账单", | ||
| 492 | |||
| 493 | # "34":"针式打印-全格线-建设银行", | ||
| 494 | # "35":"针式打印-部分格线-竖版-邮储银行", | ||
| 495 | # "36":"针式打印-部分格线-竖版-邮储银行-绿卡", | ||
| 496 | |||
| 411 | CLASSIFY_LIST = [ | 497 | CLASSIFY_LIST = [ |
| 412 | ('其他', (None, None, None, None, None, None, None, None, None, None, None, None, None)), | 498 | ('其他', OTHER_TUPLE), |
| 413 | ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)), | 499 | ('其他', OTHER_TUPLE), |
| 414 | ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), | 500 | ('其他', OTHER_TUPLE), |
| 415 | ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), | 501 | ('其他', OTHER_TUPLE), |
| 416 | ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), | 502 | ('普通打印-全表格-中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)), |
| 417 | ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)), | 503 | ('普通打印-全表格-农业银行-10列', (1, None, None, 5, None, 8, 7, 6, None, None, None, 3, 4)), |
| 418 | ('微信', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), | 504 | ('普通打印-全表格-农业银行-10列-1', (2, None, 4, 5, None, 3, 7, 6, None, None, None, None, None)), |
| 419 | ('支付宝', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), | 505 | ('普通打印-全表格-农业银行-9列', (1, None, 3, 4, None, 2, 8, 7, None, None, None, None, None)), |
| 420 | 506 | ('普通打印-全表格-北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)), | |
| 421 | ('交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | 507 | ('普通打印-全表格-工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)), |
| 422 | ('农业银行', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), | 508 | ('普通打印-全表格-工商银行-电子账单', (1, None, 9, 10, None, 7, 11, 12, None, None, None, None, None)), |
| 423 | ('农业银行', (1, 2, None, None, None, None, None, None, None, None, None, None, None)), | 509 | ('普通打印-全表格-建设银行', (3, None, 4, 5, None, 2, None, 7, None, None, None, None, None)), |
| 424 | ('招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), | 510 | ('普通打印-全表格-微信账单', (2, None, 6, None, None, 3, 7, None, None, None, None, None, None)), |
| 425 | ('招商银行电子版', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | 511 | ('普通打印-全表格-支付宝账单', (2, None, None, 6, None, 3, None, None, None, None, None, 4, 5)), |
| 426 | ('民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), | 512 | |
| 427 | 513 | ('普通打印-无格线-交通银行', (1, None, 5, 6, None, 3, None, None, None, None, 4, None, None)), | |
| 428 | ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), | 514 | ('普通打印-无格线-储蓄银行-5列', (1, None, 3, 4, None, 2, None, None, None, None, None, None, None)), |
| 429 | ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | 515 | ('普通打印-无格线-储蓄银行-8列', (1, None, 4, 5, None, 2, 6, 7, None, None, None, None, None)), |
| 430 | ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | 516 | ('普通打印-无格线-农业银行-扩张缩进', (1, 2, 5, 6, None, 4, None, None, None, None, None, None, None)), |
| 431 | ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | 517 | ('普通打印-无格线-农业银行-整数', (1, None, 3, None, None, 2, None, 4, None, None, None, None, None)), |
| 432 | ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | 518 | ('普通打印-无格线-招商银行', (1, None, 3, 4, None, 6, None, None, None, None, None, None, None)), |
| 433 | 519 | ('普通打印-无格线-招商银行-电子账单', (1, None, 3, 4, None, 5, 6, None, None, None, None, None, None)), | |
| 434 | ('建设银行', (None, None, None, None, None, None, None, None, None, None, None, None, None)), | 520 | ('普通打印-无格线-民生银行', (None, None, 5, 6, None, None, 7, None, 8, None, None, None, None)), |
| 435 | ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | 521 | |
| 436 | ('邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | 522 | ('普通打印-部分格线-横版-中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)), |
| 523 | ('普通打印-部分格线-竖版-农业银行-5列', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), | ||
| 524 | ('普通打印-部分格线-竖版-农业银行-8列', (1, None, None, 6, None, 3, 8, 7, None, None, None, 4, 5)), | ||
| 525 | ('普通打印-部分格线-竖版-农业银行-窄页', (1, None, 3, 4, None, 2, 7, 6, None, None, None, None, None)), | ||
| 526 | ('普通打印-部分格线-竖版-平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)), | ||
| 527 | ('普通打印-部分格线-竖版-建设银行-电子账单', (5, None, 6, 7, None, 2, None, 9, None, None, None, None, None)), | ||
| 528 | ('其他', OTHER_TUPLE), | ||
| 529 | ('其他', OTHER_TUPLE), | ||
| 530 | ('其他', OTHER_TUPLE), | ||
| 531 | ('其他', OTHER_TUPLE), | ||
| 532 | ('其他', OTHER_TUPLE), | ||
| 533 | ('其他', OTHER_TUPLE), | ||
| 534 | ('针式打印-全格线-建设银行', OTHER_TUPLE), | ||
| 535 | ('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
| 536 | ('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), | ||
| 537 | ('其他', OTHER_TUPLE), | ||
| 437 | ] | 538 | ] |
| 438 | 539 | ||
| 439 | # ----------license相关------------------------------------------------------------------------------------------------ | 540 | # ----------license相关------------------------------------------------------------------------------------------------ |
| 541 | |||
| 542 | # "0":"AVT Invioce", | ||
| 543 | # "1":"二手车发票", | ||
| 544 | # "2":"其他", | ||
| 545 | # "3":"护照", | ||
| 546 | # "28":"机动车登记证", | ||
| 547 | # "29":"机动车销售统一发票", | ||
| 548 | # "30":"港澳通行证", | ||
| 549 | # "31":"营业执照", | ||
| 550 | # "32":"行驶证", | ||
| 551 | # "33":"身份证", | ||
| 552 | # "37":"银行卡" | ||
| 553 | |||
| 554 | # 其他 | ||
| 555 | OTHER_CLASSIFY = 2 | ||
| 556 | |||
| 557 | # 身份证 | ||
| 558 | IC_CN_NAME = '身份证' | ||
| 559 | IC_CLASSIFY = 33 | ||
| 560 | # 增值税发票 | ||
| 561 | VAT_CN_NAME = '增值税发票' | ||
| 562 | VAT_CLASSIFY = 0 | ||
| 563 | # 机动车登记证书 | ||
| 564 | MVC_CN_NAME = '机动车登记证书' | ||
| 565 | MVC_CLASSIFY = 28 | ||
| 566 | # 机动车销售统一发票 | ||
| 567 | MVI_CN_NAME = '机动车销售统一发票' | ||
| 568 | MVI_CLASSIFY = 29 | ||
| 569 | IC_PID = VAT_PID = MVC_PID = MVI_PID = None | ||
| 570 | |||
| 440 | # 营业执照 | 571 | # 营业执照 |
| 441 | BL_KEY = 'bl' | 572 | BL_CN_NAME = '营业执照' |
| 573 | BL_CLASSIFY = 31 | ||
| 574 | BL_PID = 41 | ||
| 442 | # 二手车发票 | 575 | # 二手车发票 |
| 443 | UCI_KEY = 'uci' | 576 | UCI_CN_NAME = '二手车发票' |
| 577 | UCI_CLASSIFY = 1 | ||
| 578 | UCI_PID = 60 | ||
| 444 | # 港澳台通行证 | 579 | # 港澳台通行证 |
| 445 | EEP_KEY = 'eep' | 580 | EEP_CN_NAME = '港澳台通行证' |
| 581 | EEP_CLASSIFY = 30 | ||
| 582 | EEP_PID = 1018 | ||
| 446 | # 行驶证 | 583 | # 行驶证 |
| 447 | DL_KEY = 'dl' | 584 | DL_CN_NAME = '行驶证' |
| 585 | DL_CLASSIFY = 32 | ||
| 586 | DL_PID = 5 | ||
| 448 | # 护照 | 587 | # 护照 |
| 449 | PP_KEY = 'pp' | 588 | PP_CN_NAME = '护照' |
| 589 | PP_CLASSIFY = 3 | ||
| 590 | PP_PID = 8 | ||
| 450 | # 银行卡 | 591 | # 银行卡 |
| 451 | BC_KEY = 'bc' | 592 | BC_CN_NAME = '银行卡' |
| 452 | # 身份证 | 593 | BC_CLASSIFY = 37 |
| 453 | IC_KEY = 'ic' | 594 | BC_PID = 4 |
| 454 | # 机动车登记证书 | ||
| 455 | MVC_KEY = 'mvc' | ||
| 456 | # 机动车销售统一发票 | ||
| 457 | MVI_KEY = 'mvi' | ||
| 458 | # 增值税发票 | ||
| 459 | VAT_KEY = 'vat' | ||
| 460 | |||
| 461 | LICENSE_ORDER = ((MVI_KEY, '机动车销售统一发票'), | ||
| 462 | (IC_KEY, '身份证'), | ||
| 463 | (BC_KEY, '银行卡'), | ||
| 464 | (BL_KEY, '营业执照'), | ||
| 465 | (UCI_KEY, '二手车发票'), | ||
| 466 | (EEP_KEY, '港澳台通行证'), | ||
| 467 | (DL_KEY, '行驶证'), | ||
| 468 | (PP_KEY, '护照'), | ||
| 469 | (MVC_KEY, '机动车登记证书'), | ||
| 470 | (VAT_KEY, '增值税发票')) | ||
| 471 | |||
| 472 | BC_FIELD = (('CardNum', '银行卡号'), | 595 | BC_FIELD = (('CardNum', '银行卡号'), |
| 473 | ('BankName', '发卡行名称'), | 596 | ('BankName', '发卡行名称'), |
| 474 | ('CardName', '银行卡名称'), | 597 | ('CardName', '银行卡名称'), |
| ... | @@ -478,14 +601,19 @@ BC_FIELD = (('CardNum', '银行卡号'), | ... | @@ -478,14 +601,19 @@ BC_FIELD = (('CardNum', '银行卡号'), |
| 478 | 601 | ||
| 479 | SUCCESS_CODE_SET = {'0', 0} | 602 | SUCCESS_CODE_SET = {'0', 0} |
| 480 | 603 | ||
| 481 | BC_PID = 4 | 604 | LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME)), |
| 482 | 605 | (IC_CLASSIFY, (IC_PID, IC_CN_NAME)), | |
| 483 | OTHER_SET = {0, 1, 2} | 606 | (BC_CLASSIFY, (BC_PID, BC_CN_NAME)), |
| 484 | BS_SET = {10, 11, 12} | 607 | (BL_CLASSIFY, (BL_PID, BL_CN_NAME)), |
| 485 | LICENSE_SET_1 = {110, 111, 112} | 608 | (UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME)), |
| 486 | LICENSE_SET_2 = {1110, 1111, 1112} | 609 | (EEP_CLASSIFY, (EEP_PID, EEP_CN_NAME)), |
| 487 | 610 | (DL_CLASSIFY, (DL_PID, DL_CN_NAME)), | |
| 488 | CLASSIFY_PID_DICT = { | 611 | (PP_CLASSIFY, (PP_PID, PP_CN_NAME)), |
| 489 | 0: (4, BC_KEY) # 银行卡 | 612 | (MVC_CLASSIFY, (MVC_PID, MVC_CN_NAME)), |
| 490 | } | 613 | (VAT_CLASSIFY, (VAT_PID, VAT_CN_NAME))) |
| 491 | 614 | ||
| 615 | LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER) | ||
| 616 | |||
| 617 | OTHER_CLASSIFY_SET = {OTHER_CLASSIFY} | ||
| 618 | LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY} | ||
| 619 | LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, UCI_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY} | ... | ... |
| ... | @@ -81,9 +81,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -81,9 +81,14 @@ class Command(BaseCommand, LoggerMixin): |
| 81 | return doc_data_path, excel_path, src_excel_path, pdf_path | 81 | return doc_data_path, excel_path, src_excel_path, pdf_path |
| 82 | 82 | ||
| 83 | @staticmethod | 83 | @staticmethod |
| 84 | def bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence): | 84 | def bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify): |
| 85 | sheets = ocr_data.get('data', []) | ||
| 86 | if not sheets: | ||
| 87 | return | ||
| 88 | confidence = ocr_data.get('confidence', 1) | ||
| 89 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | ||
| 85 | for i, sheet in enumerate(sheets): | 90 | for i, sheet in enumerate(sheets): |
| 86 | sheet_name = 'page_{0}_img_{1}_{2}'.format(pno, img_idx, i) | 91 | sheet_name = '{0}_{1}'.format(img_name, i) |
| 87 | # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] | 92 | # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] |
| 88 | summary = sheet.get('summary') | 93 | summary = sheet.get('summary') |
| 89 | card = summary[1] | 94 | card = summary[1] |
| ... | @@ -139,38 +144,52 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -139,38 +144,52 @@ class Command(BaseCommand, LoggerMixin): |
| 139 | words = cell.get('words') | 144 | words = cell.get('words') |
| 140 | ws.cell(row=r1+1, column=c1+1, value=words) | 145 | ws.cell(row=r1+1, column=c1+1, value=words) |
| 141 | 146 | ||
| 142 | def license2_process(self, img_path, license_summary, pid, license_key): | 147 | @staticmethod |
| 143 | with open(img_path, 'rb') as f: | 148 | def license1_process(ocr_data, license_summary, classify): |
| 144 | base64_data = base64.b64encode(f.read()) | 149 | license_data = ocr_data.get('data', []) |
| 145 | # 获取解码后的base64值 | 150 | if not license_data: |
| 146 | filedata = base64_data.decode() | 151 | return |
| 147 | # pid 产品的pid, key, secret 登录之后能够查看到 | 152 | _, license_key = consts.CLASSIFY_PID_DICT.get(classify) |
| 148 | datas = { | 153 | for license_dict in license_data: |
| 149 | "pid": str(pid), | 154 | res_list = [] |
| 150 | "key": conf.OCR_KEY, | 155 | for field, value in license_dict.items(): |
| 151 | "secret": conf.OCR_SECRET, | 156 | res_list.append((field, value)) |
| 152 | "file": filedata | 157 | license_summary.setdefault(license_key, []).append(res_list) |
| 153 | } | 158 | |
| 154 | r = requests.post(self.ocr_url_2, data=datas) | 159 | @staticmethod |
| 155 | if r.status_code == 200: | 160 | def license2_process(ocr_res_2, license_summary, pid, classify): |
| 156 | # 识别结果 | 161 | if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET: |
| 157 | response = r.json() | 162 | if pid == consts.BC_PID: |
| 158 | if response.get('ErrorCode') in consts.SUCCESS_CODE_SET: | 163 | # 银行卡 |
| 159 | if pid == consts.BC_PID: | 164 | res_list = [] |
| 160 | # 银行卡 | 165 | for en_key, chn_key in consts.BC_FIELD: |
| 161 | res_list = [] | 166 | res_list.append((chn_key, ocr_res_2.get(en_key, ''))) |
| 162 | for en_key, chn_key in consts.BC_FIELD: | 167 | license_summary.setdefault(classify, []).append(res_list) |
| 163 | res_list.append((chn_key, response.get(en_key, ''))) | 168 | else: |
| 164 | license_summary.setdefault(license_key, []).append(res_list) | 169 | # 营业执照、行驶证等 |
| 165 | else: | 170 | for result_dict in ocr_res_2.get('ResultList', []): |
| 166 | # 营业执照、行驶证等 | 171 | res_list = [] |
| 167 | for result_dict in response.get('ResultList', []): | 172 | for field_dict in result_dict.get('FieldList', []): |
| 168 | res_list = [] | 173 | res_list.append( |
| 169 | for field_dict in result_dict.get('FieldList', []): | 174 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) |
| 170 | res_list.append((field_dict.get('chn_key', ''), field_dict.get('value', ''))) | 175 | license_summary.setdefault(classify, []).append(res_list) |
| 171 | license_summary.setdefault(license_key, []).append(res_list) | 176 | |
| 172 | 177 | # async def fetch_ocr_result(self, img_path): | |
| 173 | def ocr_2_wb(self, res, wb, pno, img_idx, bs_summary, unknown_summary, license_summary): | 178 | # async with aiohttp.ClientSession( |
| 179 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | ||
| 180 | # ) as session: | ||
| 181 | # json_data = self.get_ocr_json(img_path) | ||
| 182 | # async with session.post(self.ocr_url, json=json_data) as response: | ||
| 183 | # return await response.json() | ||
| 184 | # | ||
| 185 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): | ||
| 186 | # res = await self.fetch_ocr_result(img_path) | ||
| 187 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | ||
| 188 | # sheets_list = res.get('result').get('res') | ||
| 189 | # img_name = os.path.basename(img_path) | ||
| 190 | # self.append_sheet(wb, sheets_list, img_name, summary) | ||
| 191 | |||
| 192 | def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | ||
| 174 | # # 流水 | 193 | # # 流水 |
| 175 | # res = { | 194 | # res = { |
| 176 | # 'code': 1, | 195 | # 'code': 1, |
| ... | @@ -178,7 +197,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -178,7 +197,7 @@ class Command(BaseCommand, LoggerMixin): |
| 178 | # 'data': { | 197 | # 'data': { |
| 179 | # 'classify': 0, | 198 | # 'classify': 0, |
| 180 | # 'confidence': 0.999, | 199 | # 'confidence': 0.999, |
| 181 | # 'sheets': [ | 200 | # 'data': [ |
| 182 | # { | 201 | # { |
| 183 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | 202 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], |
| 184 | # 'cells': [] | 203 | # 'cells': [] |
| ... | @@ -220,55 +239,52 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -220,55 +239,52 @@ class Command(BaseCommand, LoggerMixin): |
| 220 | # 'confidence': 0.999, | 239 | # 'confidence': 0.999, |
| 221 | # } | 240 | # } |
| 222 | # } | 241 | # } |
| 242 | with open(img_path, 'rb') as f: | ||
| 243 | base64_data = base64.b64encode(f.read()) | ||
| 244 | # 获取解码后的base64值 | ||
| 245 | file_data = base64_data.decode() | ||
| 246 | json_data_1 = { | ||
| 247 | "file": file_data | ||
| 248 | } | ||
| 249 | response_1 = requests.post(self.ocr_url_1, data=json_data_1) | ||
| 250 | if response_1.status_code == 200: | ||
| 251 | ocr_res_1 = response_1.json() | ||
| 252 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | ||
| 253 | self.log_base, img_path, ocr_res_1)) | ||
| 254 | |||
| 255 | if ocr_res_1.get('code') == 1: | ||
| 256 | ocr_data = ocr_res_1.get('data', {}) | ||
| 257 | classify = ocr_data.get('classify') | ||
| 258 | if classify is None: | ||
| 259 | return | ||
| 260 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | ||
| 261 | return | ||
| 262 | elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1 | ||
| 263 | self.license1_process(ocr_data, license_summary, classify) | ||
| 264 | elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2 | ||
| 265 | pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) | ||
| 266 | json_data_2 = { | ||
| 267 | "pid": str(pid), | ||
| 268 | "key": conf.OCR_KEY, | ||
| 269 | "secret": conf.OCR_SECRET, | ||
| 270 | "file": file_data | ||
| 271 | } | ||
| 272 | response_2 = requests.post(self.ocr_url_2, data=json_data_2) | ||
| 273 | if response_2.status_code == 200: | ||
| 274 | # 识别结果 | ||
| 275 | ocr_res_2 = response_2.json() | ||
| 276 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | ||
| 277 | self.log_base, img_path, ocr_res_2)) | ||
| 278 | self.license2_process(ocr_res_2, license_summary, pid, classify) | ||
| 279 | else: | ||
| 280 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
| 281 | else: # 流水处理 | ||
| 282 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | ||
| 283 | else: | ||
| 284 | pass | ||
| 285 | else: | ||
| 286 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
| 223 | 287 | ||
| 224 | data = res.get('data', {}) | ||
| 225 | classify = data.get('classify') | ||
| 226 | if classify is None: | ||
| 227 | return | ||
| 228 | elif classify in consts.OTHER_SET: # 其他类 | ||
| 229 | return | ||
| 230 | elif classify in consts.BS_SET: # 流水处理 | ||
| 231 | sheets = data.get('sheets', []) | ||
| 232 | if not sheets: | ||
| 233 | return | ||
| 234 | confidence = data.get('confidence', 1) | ||
| 235 | self.bs_process(wb, sheets, bs_summary, unknown_summary, pno, img_idx, classify, confidence) | ||
| 236 | elif classify in consts.LICENSE_SET_1: # 证件1 | ||
| 237 | # self.license1_process() # TODO license1 | ||
| 238 | pass | ||
| 239 | elif classify in consts.LICENSE_SET_2: # 证件2 | ||
| 240 | pid, license_key = consts.CLASSIFY_PID_DICT.get(classify) | ||
| 241 | self.license2_process(license_summary, pid, license_key) # TODO reuse img data? | ||
| 242 | |||
| 243 | # async def fetch_ocr_result(self, img_path): | ||
| 244 | # async with aiohttp.ClientSession( | ||
| 245 | # headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False) | ||
| 246 | # ) as session: | ||
| 247 | # json_data = self.get_ocr_json(img_path) | ||
| 248 | # async with session.post(self.ocr_url, json=json_data) as response: | ||
| 249 | # return await response.json() | ||
| 250 | # | ||
| 251 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): | ||
| 252 | # res = await self.fetch_ocr_result(img_path) | ||
| 253 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | ||
| 254 | # sheets_list = res.get('result').get('res') | ||
| 255 | # img_name = os.path.basename(img_path) | ||
| 256 | # self.append_sheet(wb, sheets_list, img_name, summary) | ||
| 257 | |||
| 258 | def fetch_ocr_result(self, img_path): | ||
| 259 | files = [ | ||
| 260 | ('img', open(img_path, 'rb')) | ||
| 261 | ] | ||
| 262 | response = requests.request("POST", self.ocr_url_1, files=files) | ||
| 263 | if response.status_code == 200: | ||
| 264 | return response.json() | ||
| 265 | |||
| 266 | def img_2_ocr_2_wb(self, wb, img_info, bs_summary, unknown_summary, license_summary): | ||
| 267 | res = self.fetch_ocr_result(img_info[0]) | ||
| 268 | self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format( | ||
| 269 | self.log_base, img_info[0], res)) | ||
| 270 | if res.get('code') == 1: | ||
| 271 | self.ocr_2_wb(res, wb, img_info[1], img_info[2], bs_summary, unknown_summary, license_summary) | ||
| 272 | 288 | ||
| 273 | @staticmethod | 289 | @staticmethod |
| 274 | def get_most(value_list): | 290 | def get_most(value_list): |
| ... | @@ -414,7 +430,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -414,7 +430,6 @@ class Command(BaseCommand, LoggerMixin): |
| 414 | # EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件 | 430 | # EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件 |
| 415 | # 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件 | 431 | # 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件 |
| 416 | # TODO 数据库断联问题 | 432 | # TODO 数据库断联问题 |
| 417 | # TODO 非流水证件处理 | ||
| 418 | # TODO EDMS API GATEWAY | 433 | # TODO EDMS API GATEWAY |
| 419 | def handle(self, *args, **kwargs): | 434 | def handle(self, *args, **kwargs): |
| 420 | sleep_second = int(conf.SLEEP_SECOND) | 435 | sleep_second = int(conf.SLEEP_SECOND) |
| ... | @@ -466,8 +481,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -466,8 +481,8 @@ class Command(BaseCommand, LoggerMixin): |
| 466 | # loop.run_until_complete(asyncio.wait(tasks)) | 481 | # loop.run_until_complete(asyncio.wait(tasks)) |
| 467 | # loop.close() | 482 | # loop.close() |
| 468 | 483 | ||
| 469 | for img_info in pdf_handler.img_info_list: | 484 | for img_path in pdf_handler.img_path_list: |
| 470 | self.img_2_ocr_2_wb(wb, img_info, bs_summary, unknown_summary, license_summary) | 485 | self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) |
| 471 | 486 | ||
| 472 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( | 487 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( |
| 473 | self.log_base, bs_summary, unknown_summary, license_summary)) | 488 | self.log_base, bs_summary, unknown_summary, license_summary)) | ... | ... |
| ... | @@ -168,7 +168,7 @@ class BSWorkbook(Workbook): | ... | @@ -168,7 +168,7 @@ class BSWorkbook(Workbook): |
| 168 | month_mapping.setdefault(month_list[-1], []).insert( | 168 | month_mapping.setdefault(month_list[-1], []).insert( |
| 169 | 0, (ws.title, idx_list[-1] + min_row, ws.max_row, 0)) | 169 | 0, (ws.title, idx_list[-1] + min_row, ws.max_row, 0)) |
| 170 | 170 | ||
| 171 | def build_metadata_rows(self, classify, confidence, role, code, print_time, start_date, end_date): | 171 | def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): |
| 172 | if start_date is None or end_date is None: | 172 | if start_date is None or end_date is None: |
| 173 | timedelta = None | 173 | timedelta = None |
| 174 | else: | 174 | else: |
| ... | @@ -176,10 +176,6 @@ class BSWorkbook(Workbook): | ... | @@ -176,10 +176,6 @@ class BSWorkbook(Workbook): |
| 176 | metadata_rows = [ | 176 | metadata_rows = [ |
| 177 | ('流水识别置信度', confidence), | 177 | ('流水识别置信度', confidence), |
| 178 | self.blank_row, | 178 | self.blank_row, |
| 179 | ('分类结果', classify), | ||
| 180 | self.blank_row, | ||
| 181 | ('户名', role), | ||
| 182 | self.blank_row, | ||
| 183 | self.code_header, | 179 | self.code_header, |
| 184 | ] | 180 | ] |
| 185 | metadata_rows.extend(code) | 181 | metadata_rows.extend(code) |
| ... | @@ -200,19 +196,19 @@ class BSWorkbook(Workbook): | ... | @@ -200,19 +196,19 @@ class BSWorkbook(Workbook): |
| 200 | ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card)) | 196 | ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card)) |
| 201 | return ms | 197 | return ms |
| 202 | 198 | ||
| 203 | def build_meta_sheet(self, card, classify, confidence, role, code, print_time, start_date, end_date): | 199 | def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date): |
| 204 | metadata_rows = self.build_metadata_rows(classify, confidence, role, code, print_time, start_date, end_date) | 200 | metadata_rows = self.build_metadata_rows(confidence, code, print_time, start_date, end_date) |
| 205 | ms = self.create_meta_sheet(card) | 201 | ms = self.create_meta_sheet(card) |
| 206 | for row in metadata_rows: | 202 | for row in metadata_rows: |
| 207 | ms.append(row) | 203 | ms.append(row) |
| 208 | return ms | 204 | return ms |
| 209 | 205 | ||
| 210 | def build_month_sheet(self, role, month_mapping, ms, is_reverse): | 206 | def build_month_sheet(self, card, month_mapping, ms, is_reverse): |
| 211 | tmp_ws = self.create_sheet('tmp_ws') | 207 | tmp_ws = self.create_sheet('tmp_ws') |
| 212 | for month in sorted(month_mapping.keys()): | 208 | for month in sorted(month_mapping.keys()): |
| 213 | # 3.1.拷贝数据 | 209 | # 3.1.拷贝数据 |
| 214 | parts = month_mapping.get(month) | 210 | parts = month_mapping.get(month) |
| 215 | new_ws = self.create_sheet('{0}({1})'.format(month, role)) | 211 | new_ws = self.create_sheet('{0}({1})'.format(month, card[-6:])) |
| 216 | new_ws.append(consts.FIXED_HEADERS) | 212 | new_ws.append(consts.FIXED_HEADERS) |
| 217 | for part in parts: | 213 | for part in parts: |
| 218 | ws = self.get_sheet_by_name(part[0]) | 214 | ws = self.get_sheet_by_name(part[0]) |
| ... | @@ -338,9 +334,7 @@ class BSWorkbook(Workbook): | ... | @@ -338,9 +334,7 @@ class BSWorkbook(Workbook): |
| 338 | 334 | ||
| 339 | # 2.元信息提取表 | 335 | # 2.元信息提取表 |
| 340 | ms = self.build_meta_sheet(card, | 336 | ms = self.build_meta_sheet(card, |
| 341 | summary['classify'], | ||
| 342 | summary['confidence'], | 337 | summary['confidence'], |
| 343 | summary['role'], | ||
| 344 | summary['code'], | 338 | summary['code'], |
| 345 | summary['print_time'], | 339 | summary['print_time'], |
| 346 | start_date, | 340 | start_date, |
| ... | @@ -359,13 +353,16 @@ class BSWorkbook(Workbook): | ... | @@ -359,13 +353,16 @@ class BSWorkbook(Workbook): |
| 359 | self.remove(self.get_sheet_by_name(sheet)) | 353 | self.remove(self.get_sheet_by_name(sheet)) |
| 360 | 354 | ||
| 361 | def license_rebuild(self, license_summary): | 355 | def license_rebuild(self, license_summary): |
| 362 | for en_key, cn_key in consts.LICENSE_ORDER: | 356 | for classify, (_, name) in consts.LICENSE_ORDER: |
| 363 | ws = self.create_sheet(cn_key) | 357 | res = license_summary.get(classify) |
| 364 | for bl in license_summary.get(en_key, []): | 358 | if res is None: |
| 359 | continue | ||
| 360 | ws = self.create_sheet(name) | ||
| 361 | for bl in res: | ||
| 365 | for bl_field in bl: | 362 | for bl_field in bl: |
| 366 | ws.append(bl_field) | 363 | ws.append(bl_field) |
| 367 | ws.append((None, )) | 364 | ws.append((None, )) |
| 368 | 365 | ||
| 369 | def rebuild(self, bs_summary, license_summary): | 366 | def rebuild(self, bs_summary, license_summary): |
| 370 | self.bs_rebuild(bs_summary) | 367 | self.bs_rebuild(bs_summary) |
| 371 | # self.license_rebuild(license_summary) | 368 | self.license_rebuild(license_summary) | ... | ... |
| ... | @@ -25,7 +25,7 @@ class PDFHandler: | ... | @@ -25,7 +25,7 @@ class PDFHandler: |
| 25 | def __init__(self, path, img_dir_path): | 25 | def __init__(self, path, img_dir_path): |
| 26 | self.path = path | 26 | self.path = path |
| 27 | self.img_dir_path = img_dir_path | 27 | self.img_dir_path = img_dir_path |
| 28 | self.img_info_list = [] | 28 | self.img_path_list = [] |
| 29 | self.xref_set = set() | 29 | self.xref_set = set() |
| 30 | 30 | ||
| 31 | def get_img_save_path(self, pno, img_index=0, ext='png'): | 31 | def get_img_save_path(self, pno, img_index=0, ext='png'): |
| ... | @@ -38,7 +38,7 @@ class PDFHandler: | ... | @@ -38,7 +38,7 @@ class PDFHandler: |
| 38 | pm = page.getPixmap(matrix=trans_2, alpha=False) | 38 | pm = page.getPixmap(matrix=trans_2, alpha=False) |
| 39 | img_save_path = self.get_img_save_path(page.number) | 39 | img_save_path = self.get_img_save_path(page.number) |
| 40 | pm.writePNG(img_save_path) | 40 | pm.writePNG(img_save_path) |
| 41 | self.img_info_list.append((img_save_path, page.number, 0)) | 41 | self.img_path_list.append(img_save_path) |
| 42 | 42 | ||
| 43 | @staticmethod | 43 | @staticmethod |
| 44 | def getimage(pix): | 44 | def getimage(pix): |
| ... | @@ -88,7 +88,7 @@ class PDFHandler: | ... | @@ -88,7 +88,7 @@ class PDFHandler: |
| 88 | with open(img_save_path, "wb") as f: | 88 | with open(img_save_path, "wb") as f: |
| 89 | f.write(img_data) | 89 | f.write(img_data) |
| 90 | self.xref_set.add(xref) | 90 | self.xref_set.add(xref) |
| 91 | self.img_info_list.append((img_save_path, pno, img_index)) | 91 | self.img_path_list.append(img_save_path) |
| 92 | 92 | ||
| 93 | @staticmethod | 93 | @staticmethod |
| 94 | def split_il(il): | 94 | def split_il(il): |
| ... | @@ -179,7 +179,7 @@ class PDFHandler: | ... | @@ -179,7 +179,7 @@ class PDFHandler: |
| 179 | img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2]) | 179 | img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2]) |
| 180 | new_img.save(img_save_path) | 180 | new_img.save(img_save_path) |
| 181 | page_to_png = False | 181 | page_to_png = False |
| 182 | self.img_info_list.append((img_save_path, pno, img_index)) | 182 | self.img_path_list.append(img_save_path) |
| 183 | 183 | ||
| 184 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 | 184 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 |
| 185 | if page_to_png: | 185 | if page_to_png: | ... | ... |
-
Please register or sign in to post a comment