Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
31656b38
authored
2021-05-12 17:04:00 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
DDA part 1
1 parent
779dbb1a
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
111 additions
and
11 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/consts.py
View file @
31656b3
...
...
@@ -520,7 +520,7 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None,
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
# "
38
":"普通打印-无格线-农业银行-整数-特殊",
# "
50
":"普通打印-无格线-农业银行-整数-特殊",
CLASSIFY_LIST
=
[
(
'其他'
,
OTHER_TUPLE
),
...
...
@@ -563,6 +563,18 @@ CLASSIFY_LIST = [
(
'针式打印-部分格线-竖版-邮储银行'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'针式打印-部分格线-竖版-邮储银行-绿卡'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'普通打印-无格线-农业银行-整数-特殊'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
None
,
5
,
None
,
None
,
None
,
None
,
None
)),
]
...
...
@@ -608,6 +620,18 @@ CLASSIFY_HEADER_LIST = [
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
(
'交易日期'
,
'摘要/附言'
,
'交易金额'
,
'账户余额'
,
'对方账号和户名'
),
]
...
...
@@ -812,7 +836,7 @@ MVI_FIELD_ORDER = (('发票代码', '发票代码'),
(
'主管税务机关及代码'
,
'主管税务机关及代码'
),
(
'吨位'
,
'吨位'
),
(
'限乘人数'
,
'限乘人数'
),)
IC_PID
=
VAT_PID
=
VATS_PID
=
MVC_PID
=
MVI_PID
=
None
IC_PID
=
VAT_PID
=
VATS_PID
=
MVC_PID
=
MVI_PID
=
RP_PID
=
None
# 营业执照
BL_CN_NAME
=
'营业执照'
...
...
@@ -916,6 +940,10 @@ BC_FIELD_ORDER = (('BankName', '发卡行名称'),
(
'CardType'
,
'银行卡类型'
),
(
'Name'
,
'持卡人姓名'
),)
# DDA
DDA_CN_NAME
=
'DDA'
DDA_CLASSIFY
=
38
SUCCESS_CODE_SET
=
{
'0'
,
0
}
FIELD_ORDER_MAP
=
{
...
...
@@ -941,7 +969,7 @@ MODEL_FIELD_VAT = 'vat_count'
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
,
MVI_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_MVI
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_IC
)),
(
RP_CLASSIFY
,
(
None
,
RP_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_RP
)),
(
RP_CLASSIFY
,
(
RP_PID
,
RP_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_RP
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
,
BC_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_BC
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
,
BL_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_BL
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
,
UCI_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_UCI
)),
...
...
@@ -960,12 +988,12 @@ FOLDER_LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, F
LICENSE_CLASSIFY_MAPPING
=
dict
(
LICENSE_ORDER
)
OTHER_CLASSIFY_SET
=
{
OTHER_CLASSIFY
}
LICENSE_CLASSIFY_SET_1
=
{
IC_CLASSIFY
,
VAT_CLASSIFY
,
MVC_CLASSIFY
,
MVI_CLASSIFY
,
UCI_CLASSIFY
}
LICENSE_CLASSIFY_SET_1
=
{
IC_CLASSIFY
,
VAT_CLASSIFY
,
MVC_CLASSIFY
,
MVI_CLASSIFY
,
UCI_CLASSIFY
,
DDA_CLASSIFY
}
LICENSE_CLASSIFY_SET_2
=
{
BL_CLASSIFY
,
EEP_CLASSIFY
,
DL_CLASSIFY
,
PP_CLASSIFY
,
BC_CLASSIFY
}
NYYH_CLASSIFY
=
{
17
,
18
}
NYZS_CLASSIFY
=
18
SPECIAL_NYZS_CLASSIFY
=
38
SPECIAL_NYZS_CLASSIFY
=
50
MS_CLASSIFY
=
21
MS_ERROR_COL
=
(
5
,
6
)
WECHART_CLASSIFY
=
12
...
...
@@ -1163,5 +1191,24 @@ UC_ORDER = ('vinNo', 'manufactureDate', 'firstRegistrationDate')
CO_ORDER
=
(
'customerType'
,
'customerChineseName'
,
'legalRepName'
,
'idNum'
,
'businessLicenseNo'
,
'taxRegistrationCode'
,
'incorporationDate'
,
'businessLicenseDueDate'
,
'capitalRegAmount'
)
# --------------- DDA 保存图片 --------------------
DDA_FIELD
=
'DDA'
IC_FIELD
=
'ID'
BC_FIELD
=
'BC'
IC_KEY_FIELD
=
(
'姓名'
,
'公民身份号码'
)
BC_KEY_FIELD
=
'CardNum'
DDA_IC_NAME
=
'customer_name'
DDA_IC_ID
=
'customer_id'
DDA_BC_NAME
=
'account_name'
DDA_BC_ID
=
'account_id'
DDA_IMG_PATH
=
'img_path'
DDA_MAPPING
=
[
(
DDA_IC_NAME
,
IC_FIELD
),
(
DDA_IC_ID
,
IC_FIELD
),
(
DDA_BC_ID
,
BC_FIELD
),
]
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
31656b3
...
...
@@ -189,13 +189,23 @@ class Command(BaseCommand, LoggerMixin):
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
license_data
=
ocr_data
.
get
(
'data'
,
[]
)
license_data
=
ocr_data
.
get
(
'data'
)
if
not
license_data
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
classify
==
consts
.
DDA_CLASSIFY
:
# DDA处理
dda_ocr_result
=
{
consts
.
DDA_IC_NAME
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_IC_NAME
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_IC_ID
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_IC_ID
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_BC_NAME
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_BC_NAME
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_BC_ID
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_BC_ID
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_IMG_PATH
:
img_path
}
license_summary
.
setdefault
(
classify
,
[])
.
append
(
dda_ocr_result
)
if
classify
==
consts
.
MVC_CLASSIFY
:
# 车辆登记证 3/4页结果整合
for
mvc_dict
in
license_data
:
try
:
...
...
@@ -231,6 +241,8 @@ class Command(BaseCommand, LoggerMixin):
mvc_dict
[
'解除抵押日期'
]
.
append
(
register_info
.
get
(
'details'
,
{})
.
get
(
'date'
,
{})
.
get
(
'words'
,
''
))
del
mvc_res
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
if
classify
==
consts
.
IC_CLASSIFY
:
# 身份证真伪
for
id_card_dict
in
license_data
:
try
:
...
...
@@ -277,9 +289,18 @@ class Command(BaseCommand, LoggerMixin):
'{0} [ocr_4 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
id_card_dict
[
consts
.
IC_TURE_OR_FALSE
]
=
consts
.
IC_RES_MAPPING
.
get
(
card_type
)
finally
:
if
is_hil
:
for
key
in
consts
.
IC_KEY_FIELD
:
if
not
isinstance
(
id_card_dict
.
get
(
key
),
str
):
break
value
=
id_card_dict
[
key
]
.
strip
()
if
len
(
value
)
>
0
:
hil_id_bc_mapping
.
setdefault
(
consts
.
IC_FIELD
,
dict
())
.
setdefault
(
value
,
set
())
.
add
(
img_path
)
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
):
if
ocr_res_2
.
get
(
'ErrorCode'
)
in
consts
.
SUCCESS_CODE_SET
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
pid
==
consts
.
BC_PID
:
...
...
@@ -288,6 +309,11 @@ class Command(BaseCommand, LoggerMixin):
# for en_key, chn_key in consts.BC_FIELD:
# res_dict[chn_key] = ocr_res_2.get(en_key, '')
license_summary
.
setdefault
(
classify
,
[])
.
append
(
ocr_res_2
)
if
is_hil
and
isinstance
(
ocr_res_2
.
get
(
consts
.
BC_KEY_FIELD
),
str
):
value
=
ocr_res_2
[
consts
.
BC_KEY_FIELD
]
.
strip
()
if
len
(
value
)
>
0
:
hil_id_bc_mapping
.
setdefault
(
consts
.
BC_FIELD
,
dict
())
.
setdefault
(
value
,
set
())
.
add
(
img_path
)
else
:
# 营业执照等
for
result_dict
in
ocr_res_2
.
get
(
'ResultList'
,
[]):
...
...
@@ -696,6 +722,8 @@ class Command(BaseCommand, LoggerMixin):
business_type
,
doc_id_str
=
task_str
.
split
(
consts
.
SPLIT_STR
)
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
is_hil
=
True
if
business_type
==
consts
.
HIL_PREFIX
else
False
hil_id_bc_mapping
=
dict
()
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
consts
.
TMP_DIR_NAME
,
doc_id_str
)
excel_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.xlsx'
.
format
(
doc_id_str
))
...
...
@@ -742,8 +770,8 @@ class Command(BaseCommand, LoggerMixin):
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_1
:
# 证件1
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
)
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
)
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_2
:
# 证件2
pid
,
_
,
_
,
_
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
file_data
=
ocr_data
.
get
(
'section_img'
)
...
...
@@ -788,7 +816,9 @@ class Command(BaseCommand, LoggerMixin):
card_name_res
.
get
(
'data'
,
{})
.
get
(
'is_exists_name'
)
==
0
:
name
=
'无'
ocr_2_res
[
'Name'
]
=
name
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
)
break
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
...
...
@@ -890,6 +920,29 @@ class Command(BaseCommand, LoggerMixin):
os
.
remove
(
excel_path
)
finally
:
# TODO 识别结果存一张表,方便跑报表
# DDA处理
if
is_hil
:
# 获取需要保存图片的集合
hil_dda_save_img_list
=
[]
for
dda_res_list
in
license_summary
.
get
(
consts
.
DDA_CLASSIFY
,
[]):
for
dda_idx
,
dda_res
in
enumerate
(
dda_res_list
):
save_img_dict
=
{
consts
.
DDA_FIELD
:
dda_res
.
get
(
consts
.
DDA_IMG_PATH
)
}
for
dda_field
,
license_field
in
consts
.
DDA_MAPPING
:
target_str
=
dda_res
.
get
(
dda_field
,
''
)
save_img_dict
.
setdefault
(
license_field
,
set
())
.
update
(
hil_id_bc_mapping
.
get
(
license_field
,
dict
())
.
get
(
target_str
,
set
()))
hil_dda_save_img_list
.
append
(
save_img_dict
)
self
.
online_log
.
info
(
'{0} [DDA process] [DDA_info={1}]'
.
format
(
self
.
log_base
,
license_summary
.
get
(
consts
.
DDA_CLASSIFY
,
[])))
self
.
online_log
.
info
(
'{0} [DDA process] [ic&bc_info={1}]'
.
format
(
self
.
log_base
,
hil_id_bc_mapping
))
self
.
online_log
.
info
(
'{0} [DDA process] [img_path={1}]'
.
format
(
self
.
log_base
,
hil_dda_save_img_list
))
# 保存图片
# 数据库记录
# report记录
# CA比对
if
doc
.
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
0
]:
try
:
# 更新OCR累计识别结果表
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment