Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
6e4b79a4
authored
2020-10-18 23:03:03 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/license' into feature/mssql
2 parents
7dfc2ee8
cc0dc16d
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
61 additions
and
47 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
6e4b79a
...
...
@@ -583,60 +583,73 @@ OTHER_CLASSIFY = 2
# 身份证
IC_CN_NAME
=
'身份证'
IC_CLASSIFY
=
33
IC_FIELD_ORDER
=
()
# 增值税发票
VAT_CN_NAME
=
'增值税发票'
VAT_CLASSIFY
=
0
VAT_FIELD_ORDER
=
()
# 机动车登记证书
MVC_CN_NAME
=
'机动车登记证书'
MVC_CLASSIFY
=
28
MVC_FIELD_ORDER
=
()
MVC_SE_FIELD_ORDER
=
()
# 机动车销售统一发票
MVI_CN_NAME
=
'机动车销售统一发票'
MVI_CLASSIFY
=
29
MVI_FIELD_ORDER
=
()
IC_PID
=
VAT_PID
=
MVC_PID
=
MVI_PID
=
None
# 营业执照
BL_CN_NAME
=
'营业执照'
BL_CLASSIFY
=
31
BL_PID
=
41
BL_FIELD_ORDER
=
()
# 二手车发票
UCI_CN_NAME
=
'二手车发票'
UCI_CLASSIFY
=
1
UCI_PID
=
60
UCI_FIELD_ORDER
=
()
# 港澳台通行证
EEP_CN_NAME
=
'港澳台通行证'
EEP_CLASSIFY
=
30
EEP_PID
=
1018
EEP_FIELD_ORDER
=
()
# 行驶证
DL_CN_NAME
=
'行驶证'
DL_CLASSIFY
=
32
DL_PID
=
5
DL_FIELD_ORDER
=
()
# 护照
PP_CN_NAME
=
'护照'
PP_CLASSIFY
=
3
PP_PID
=
8
PP_FIELD_ORDER
=
()
# 银行卡
BC_CN_NAME
=
'银行卡'
BC_CLASSIFY
=
37
BC_PID
=
4
BC_FIELD
=
((
'CardNum'
,
'银行卡号'
),
(
'BankName'
,
'发卡行名称'
),
(
'CardName'
,
'银行卡名称'
),
(
'BankCode'
,
'发卡行代号'
),
(
'CardType'
,
'银行卡类型'
),
(
'Date'
,
'日期'
))
# BC_FIELD = (('CardNum', '银行卡号'),
# ('BankName', '发卡行名称'),
# ('CardName', '银行卡名称'),
# ('BankCode', '发卡行代号'),
# ('CardType', '银行卡类型'),
# ('Date', '日期'))
BC_FIELD_ORDER
=
((
'BankName'
,
'发卡行名称'
),
(
'CardNum'
,
'银行卡号'
),
(
'CardType'
,
'银行卡类型'
),)
SUCCESS_CODE_SET
=
{
'0'
,
0
}
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
)),
(
EEP_CLASSIFY
,
(
EEP_PID
,
EEP_CN_NAME
)),
(
DL_CLASSIFY
,
(
DL_PID
,
DL_CN_NAME
)),
(
PP_CLASSIFY
,
(
PP_PID
,
PP_CN_NAME
)),
(
MVC_CLASSIFY
,
(
MVC_PID
,
MVC_CN_NAME
)),
(
VAT_CLASSIFY
,
(
VAT_PID
,
VAT_CN_NAME
)))
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
,
MVI_FIELD_ORDER
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
,
IC_FIELD_ORDER
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
,
BC_FIELD_ORDER
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
,
BL_FIELD_ORDER
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
,
UCI_FIELD_ORDER
)),
(
EEP_CLASSIFY
,
(
EEP_PID
,
EEP_CN_NAME
,
EEP_FIELD_ORDER
)),
(
DL_CLASSIFY
,
(
DL_PID
,
DL_CN_NAME
,
DL_FIELD_ORDER
)),
(
PP_CLASSIFY
,
(
PP_PID
,
PP_CN_NAME
,
PP_FIELD_ORDER
)),
(
MVC_CLASSIFY
,
(
MVC_PID
,
MVC_CN_NAME
,
MVC_FIELD_ORDER
)),
(
VAT_CLASSIFY
,
(
VAT_PID
,
VAT_CN_NAME
,
VAT_FIELD_ORDER
)))
LICENSE_CLASSIFY_MAPPING
=
dict
(
LICENSE_ORDER
)
...
...
src/apps/doc/management/commands/doc_ocr_process.py
View file @
6e4b79a
...
...
@@ -163,28 +163,23 @@ class Command(BaseCommand, LoggerMixin):
if
not
license_data
:
skip_img
.
append
(
self
.
parse_img_path
(
img_path
))
return
for
license_dict
in
license_data
:
res_list
=
[]
for
field
,
value
in
license_dict
.
items
():
res_list
.
append
((
field
,
value
))
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_list
)
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
skip_img
,
img_path
):
if
ocr_res_2
.
get
(
'ErrorCode'
)
in
consts
.
SUCCESS_CODE_SET
:
if
pid
==
consts
.
BC_PID
:
# 银行卡
res_list
=
[]
for
en_key
,
chn_key
in
consts
.
BC_FIELD
:
res_list
.
append
((
chn_key
,
ocr_res_2
.
get
(
en_key
,
''
))
)
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_list
)
# res_dict = {}
#
for en_key, chn_key in consts.BC_FIELD:
# res_dict[chn_key] = ocr_res_2.get(en_key, ''
)
license_summary
.
setdefault
(
classify
,
[])
.
append
(
ocr_res_2
)
else
:
# 营业执照、行驶证等
for
result_dict
in
ocr_res_2
.
get
(
'ResultList'
,
[]):
res_
list
=
[]
res_
dict
=
{}
for
field_dict
in
result_dict
.
get
(
'FieldList'
,
[]):
res_list
.
append
(
(
field_dict
.
get
(
'chn_key'
,
''
),
field_dict
.
get
(
'value'
,
''
)))
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_list
)
res_dict
[
field_dict
.
get
(
'chn_key'
,
''
)]
=
field_dict
.
get
(
'value'
,
''
)
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_dict
)
else
:
skip_img
.
append
(
self
.
parse_img_path
(
img_path
))
...
...
@@ -229,7 +224,7 @@ class Command(BaseCommand, LoggerMixin):
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_1
:
# 证件1
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
skip_img
,
img_path
)
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_2
:
# 证件2
pid
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
pid
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
json_data_2
=
{
"pid"
:
str
(
pid
),
"key"
:
conf
.
OCR_KEY
,
...
...
@@ -566,9 +561,11 @@ class Command(BaseCommand, LoggerMixin):
# 4.2 重构Excel文件
wb
.
save
(
src_excel_path
)
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
skip_img
)
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
skip_img
,
doc
.
document_scheme
)
wb
.
save
(
excel_path
)
except
EDMSException
as
e
:
doc
.
status
=
DocStatus
.
PROCESS_FAILED
.
value
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (edms download)] [business_type={1}] [doc_id={2}] '
'[err={3}]'
.
format
(
self
.
log_base
,
business_type
,
doc
.
id
,
e
))
except
Exception
as
e
:
...
...
src/apps/doc/ocr/wb.py
View file @
6e4b79a
...
...
@@ -242,14 +242,15 @@ class BSWorkbook(Workbook):
elif
summary_cell
.
value
in
self
.
loan_keyword
:
summary_cell
.
fill
=
self
.
loan_fill
amount_error
=
False
# 3.3.余额转数值
over_cell
=
rows
[
consts
.
OVER_IDX
]
try
:
over_cell
.
value
=
locale
.
atof
(
self
.
amount_format
(
over_cell
.
value
))
except
Exception
as
e
:
contin
ue
amount_error
=
Tr
ue
else
:
over_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_
COMMA_SEPARATED1
over_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_
00
# 3.4.金额转数值
try
:
...
...
@@ -267,11 +268,11 @@ class BSWorkbook(Workbook):
if
amount_cell
.
value
>
0
:
amount_cell
.
value
=
-
amount_cell
.
value
except
Exception
as
e
:
contin
ue
amount_error
=
Tr
ue
else
:
if
rows
[
consts
.
BORROW_IDX
]
.
value
in
consts
.
BORROW_OUTLAY_SET
:
amount_cell
.
value
=
-
amount_cell
.
value
amount_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_
COMMA_SEPARATED1
amount_cell
.
number_format
=
numbers
.
FORMAT_NUMBER_
00
same_amount_mapping
=
amount_mapping
.
get
(
date_cell
.
value
,
{})
fill_rows
=
same_amount_mapping
.
get
(
-
amount_cell
.
value
)
if
fill_rows
:
...
...
@@ -281,12 +282,12 @@ class BSWorkbook(Workbook):
amount_cell
.
value
,
[])
.
append
(
row
)
# 3.5.核对结果
if
row
>
2
:
if
row
>
2
and
not
amount_error
:
if
is_reverse
:
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=
SUM(D{1},C{0}
), "{2}", "{3}")'
.
format
(
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=
ROUND(SUM(D{1},C{0}),2
), "{2}", "{3}")'
.
format
(
row
-
1
,
row
,
*
self
.
proof_res
)
else
:
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=
SUM(D{1},C{0}
), "{2}", "{3}")'
.
format
(
rows
[
consts
.
RESULT_IDX
]
.
value
=
'=IF(D{0}=
ROUND(SUM(D{1},C{0}),2
), "{2}", "{3}")'
.
format
(
row
,
row
-
1
,
*
self
.
proof_res
)
# 删除金额辅助列
...
...
@@ -358,15 +359,18 @@ class BSWorkbook(Workbook):
for
sheet
in
summary
.
get
(
'sheet'
):
self
.
remove
(
self
.
get_sheet_by_name
(
sheet
))
def
license_rebuild
(
self
,
license_summary
):
for
classify
,
(
_
,
name
)
in
consts
.
LICENSE_ORDER
:
res
=
license_summary
.
get
(
classify
)
if
res
is
None
:
def
license_rebuild
(
self
,
license_summary
,
document_scheme
):
for
classify
,
(
_
,
name
,
field_order
)
in
consts
.
LICENSE_ORDER
:
# 机动车登记证:CA和SE不同顺序
if
classify
==
consts
.
MVC_CLASSIFY
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
field_order
=
consts
.
MVC_SE_FIELD_ORDER
license_list
=
license_summary
.
get
(
classify
)
if
license_list
is
None
:
continue
ws
=
self
.
create_sheet
(
name
)
for
bl
in
res
:
for
bl_field
in
bl
:
ws
.
append
(
bl_field
)
for
license_dict
in
license_list
:
for
search_field
,
write_field
in
field_order
:
ws
.
append
(
(
write_field
,
license_dict
.
get
(
search_field
,
''
))
)
ws
.
append
((
None
,
))
def
skip_img_sheet
(
self
,
skip_img
):
...
...
@@ -376,7 +380,7 @@ class BSWorkbook(Workbook):
for
img_tuple
in
skip_img
:
ws
.
append
(
img_tuple
)
def
rebuild
(
self
,
bs_summary
,
license_summary
,
skip_img
):
def
rebuild
(
self
,
bs_summary
,
license_summary
,
skip_img
,
document_scheme
):
self
.
bs_rebuild
(
bs_summary
)
self
.
license_rebuild
(
license_summary
)
self
.
license_rebuild
(
license_summary
,
document_scheme
)
self
.
skip_img_sheet
(
skip_img
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment