Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
96b67222
authored
2020-09-23 15:39:31 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug & add license
1 parent
a7e918d7
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
72 additions
and
39 deletions
.gitignore
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/ocr/wb.py
.gitignore
View file @
96b6722
...
...
@@ -34,4 +34,5 @@ data/*
src/*.sh
test.py
ocr_test.py
\ No newline at end of file
ocr_test.py
ocr_test_2.py
\ No newline at end of file
...
...
src/apps/doc/consts.py
View file @
96b6722
PAGE_DEFAULT
=
1
PAGE_SIZE_DEFAULT
=
10
TRANS
=
str
.
maketrans
(
'Cc((oODlLmAsSbg'
,
'000000011345569'
)
CARD_RATIO
=
0.9
UNKNOWN_CARD
=
'未知卡号'
UNKNOWN_ROLE
=
'未知户名'
DATE_FORMAT
=
[
'
%
Y年
%
m月
%
d日'
,
'
%
Y/
%
m/
%
d'
,
'
%
Y-
%
m-
%
d'
,
'
%
Y
%
m
%
d'
]
FIXED_APPLICATION_ID_PREFIX
=
'CH-S'
DOC_SCHEME_LIST
=
[
'ACCEPTANCE'
,
'SETTLEMENT'
,
'CONTRACT MANAGEMENT'
]
...
...
@@ -19,6 +12,8 @@ SPLIT_STR = '_'
BUSINESS_TYPE_LIST
=
[
HIL_PREFIX
,
AFC_PREFIX
]
HIL_SET
=
{
'HIL'
,
'HIl'
,
'HiL'
,
'Hil'
,
'hIL'
,
'hIl'
,
'hiL'
,
'hil'
,
'CO00002'
}
# -------EDMS相关---------------------------------------------------------------------------------------------------
SESSION_PREFIX
=
'FHLSID'
CUSTOM_CLIENT
=
'CustomClient'
FIXED_TOKEN
=
'00000000-0000-0000-0000-000000000000'
...
...
@@ -40,13 +35,23 @@ DEALER_CODE_META_FIELD_id = 13
BUSINESS_TYPE_META_FIELD_id
=
93
DEALER_CODE
=
'ocr_situ_group'
# ---------银行流水模板相关--------------------------------------------------------------------------------------------
TRANS
=
str
.
maketrans
(
'Cc((oODlLmAsSbg'
,
'000000011345569'
)
CARD_RATIO
=
0.9
UNKNOWN_CARD
=
'未知卡号'
UNKNOWN_ROLE
=
'未知户名'
DATE_FORMAT
=
[
'
%
Y年
%
m月
%
d日'
,
'
%
Y/
%
m/
%
d'
,
'
%
Y-
%
m-
%
d'
,
'
%
Y
%
m
%
d'
]
AMOUNT_COL_TITLE_SET
=
{
"交易金额"
,
"金额"
,
"收入/支出金额"
,
"发生额"
}
OVERAGE_COL_TITLE_SET
=
{
"账户余额"
,
"余额"
}
PROOF_COL_TITLE
=
'核对结果'
PROOF_RES
=
(
'对'
,
'错'
)
META_SHEET_TITLE
=
'关键信息提取和展示'
FIXED_HEADERS
=
(
'记账日期'
,
'记账时间'
,
'金额'
,
'余额'
,
'交易名称'
,
'附言'
,
'对方账户名'
,
'对方卡号/账号'
,
'对方开户行'
,
'核对结果'
,
'借贷'
,
'收入'
,
'支出'
)
FIXED_HEADERS
=
(
'记账日期'
,
'记账时间'
,
'金额'
,
'余额'
,
'交易名称'
,
'附言'
,
'对方账户名'
,
'对方卡号/账号'
,
'对方开户行'
,
'核对结果'
,
'借贷'
,
'收入'
,
'支出'
)
FIXED_COL_AMOUNT
=
len
(
FIXED_HEADERS
)
BASE_HEADERS_MAPPING
=
{
label
:
idx
+
1
for
idx
,
label
in
enumerate
(
FIXED_HEADERS
)}
BORROW_HEADER_COL
=
BASE_HEADERS_MAPPING
[
'借贷'
]
...
...
@@ -375,32 +380,6 @@ HEADERS_MAPPING.update(
# ]
# {
# "0": "全表格-中国农业银行个人账户明细",
# "1": "全表格-中国银行",
# "2": "全表格-北京银行",
# "3": "全表格-工商银行",
# "4": "全表格-建设银行",
# "5": "部分格线-横版-中信银行账户交易明细",
# "6": "部分格线-横版-中信银行账户交易明细特殊",
# "7": "部分格线-竖版-中国农业银行",
# "8": "部分格线-竖版-中国农业银行分账户(窄页)",
# "9": "部分格线-竖版-平安电子账单"
# }
# CLASSIFY_LIST = [
# ('农业银行', (1, None, 3, 5, None, 8, 7, 6, None, None, None, None, None)),
# ('中国银行', (1, 2, 4, 5, 6, 9, 10, 11, 12, None, None, None, None)),
# ('北京银行', (1, None, 4, 5, None, 2, 6, 7, None, None, 3, None, None)),
# ('工商银行', (1, None, 9, 10, None, 7, None, None, None, None, None, None, None)),
# ('建设银行', (None, None, None, None, None, 2, None, None, None, None, None, None, None)),
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('中信银行', (1, None, None, 5, None, 2, 6, 7, 8, None, None, 3, 4)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('农业银行', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
# ('平安电子账单', (2, None, 5, 6, None, 4, None, None, None, None, None, None, None)),
# ]
# {
# "0":"其他",
# "1":"普通打印-全表格-中国农业银行",
# "2":"普通打印-全表格-中国银行",
...
...
@@ -456,3 +435,46 @@ CLASSIFY_LIST = [
(
'邮储银行'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'邮储银行'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
]
# ----------license相关------------------------------------------------------------------------------------------------
# 营业执照
BL_KEY
=
'bl'
# 二手车发票
UCI_KEY
=
'uci'
# 港澳台通行证
EEP_KEY
=
'eep'
# 行驶证
DL_KEY
=
'dl'
# 护照
PP_KEY
=
'pp'
# 银行卡
BC_KEY
=
'bc'
# 身份证
IC_KEY
=
'ic'
# 机动车登记证书
MVC_KEY
=
'mvc'
# 机动车销售统一发票
MVI_KEY
=
'mvi'
# 增值税发票
VAT_KEY
=
'vat'
LICENSE_ORDER
=
((
MVI_KEY
,
'机动车销售统一发票'
),
(
IC_KEY
,
'身份证'
),
(
BC_KEY
,
'银行卡'
),
(
BL_KEY
,
'营业执照'
),
(
UCI_KEY
,
'二手车发票'
),
(
EEP_KEY
,
'港澳台通行证'
),
(
DL_KEY
,
'行驶证'
),
(
PP_KEY
,
'护照'
),
(
MVC_KEY
,
'机动车登记证书'
),
(
VAT_KEY
,
'增值税发票'
))
BC_FIELD
=
((
'CardNum'
,
'银行卡号'
),
(
'BankName'
,
'发卡行名称'
),
(
'CardName'
,
'银行卡名称'
),
(
'BankCode'
,
'发卡行代号'
),
(
'CardType'
,
'银行卡类型'
),
(
'Date'
,
'日期'
))
SUCCESS_CODE_SET
=
{
'0'
,
0
}
...
...
src/apps/doc/management/commands/doc_ocr_process.py
View file @
96b6722
...
...
@@ -236,7 +236,7 @@ class Command(BaseCommand, LoggerMixin):
merged_bs_summary
[
main_card
][
'sheet'
]
.
extend
(
bs_summary
[
card
][
'sheet'
])
merged_bs_summary
[
main_card
][
'role'
]
.
extend
(
bs_summary
[
card
][
'role'
])
merged_bs_summary
[
main_card
][
'role_set'
]
.
update
(
bs_summary
[
card
][
'role_set'
])
merged_bs_summary
[
main_card
][
'code'
]
.
extend
(
bs_summary
[
card
][
'
sheet
'
])
merged_bs_summary
[
main_card
][
'code'
]
.
extend
(
bs_summary
[
card
][
'
code
'
])
merged_bs_summary
[
main_card
][
'print_time'
]
.
extend
(
bs_summary
[
card
][
'print_time'
])
merged_bs_summary
[
main_card
][
'start_date'
]
.
extend
(
bs_summary
[
card
][
'start_date'
])
merged_bs_summary
[
main_card
][
'end_date'
]
.
extend
(
bs_summary
[
card
][
'end_date'
])
...
...
@@ -310,7 +310,7 @@ class Command(BaseCommand, LoggerMixin):
if
role
in
card_summary
[
'role_set'
]:
merge_role
.
append
(
role
)
card_summary
[
'sheet'
]
.
extend
(
summary
[
'sheet'
])
card_summary
[
'code'
]
.
extend
(
summary
[
'
sheet
'
])
card_summary
[
'code'
]
.
extend
(
summary
[
'
code
'
])
card_summary
[
'print_time'
]
.
extend
(
summary
[
'print_time'
])
card_summary
[
'start_date'
]
.
extend
(
summary
[
'start_date'
])
card_summary
[
'end_date'
]
.
extend
(
summary
[
'end_date'
])
...
...
@@ -340,7 +340,8 @@ class Command(BaseCommand, LoggerMixin):
# TODO 协程异步发送OCR请求
# TODO 异常邮件通知
# TODO 数据库断联问题
# TODO 非流水证件处理,Excel模板
# TODO 非流水证件处理
# TODO EDMS API GATEWAY
def
handle
(
self
,
*
args
,
**
kwargs
):
sleep_second
=
int
(
conf
.
SLEEP_SECOND
)
max_sleep_second
=
int
(
conf
.
MAX_SLEEP_SECOND
)
...
...
src/apps/doc/ocr/wb.py
View file @
96b6722
...
...
@@ -358,5 +358,14 @@ class BSWorkbook(Workbook):
for
sheet
in
summary
[
'sheet'
]:
self
.
remove
(
self
.
get_sheet_by_name
(
sheet
))
def
license_rebuild
(
self
,
license_summary
):
for
en_key
,
cn_key
in
consts
.
LICENSE_ORDER
:
ws
=
self
.
create_sheet
(
cn_key
)
for
bl
in
license_summary
.
get
(
en_key
,
[]):
for
bl_field
in
bl
:
ws
.
append
(
bl_field
)
ws
.
append
((
None
,
))
def
rebuild
(
self
,
bs_summary
,
license_summary
):
self
.
bs_rebuild
(
bs_summary
)
\ No newline at end of file
self
.
bs_rebuild
(
bs_summary
)
# self.license_rebuild(license_summary)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment