Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
31656b38
authored
4 years ago
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
DDA part 1
1 parent
779dbb1a
master
...
CHINARPA-4562
OCR-recognition-for-FSM-related-documents
feature/202506-monixiadan
feature/4058
feature/CHINAPRA-4447
feature/CHINARPA-3290-FSM-AUTO
feature/CHINARPA-3443
feature/CHINARPA-3523
feature/CHINARPA-3528
feature/CHINARPA-3529
feature/CHINARPA-3577
feature/CHINARPA-3786
feature/CHINARPA-3964
feature/CHINARPA-4137
feature/CHINARPA-4277
feature/CHINARPA-4302/all-pass
feature/CHINARPA-4341
feature/CHINARPA-4357
feature/CHINARPA-4358
feature/CHINARPA-4395
feature/CHINARPA-4495
feature/CHINARPA-4546
feature/CHINARPA-4623
feature/CHINARPA-4659
feature/CHINARPA-4660
feature/CHINARPA-4731
feature/CHINARPA-4846
feature/CHINARPA-4941
feature/CHINARPA-4942
feature/CHINARPA-4944
feature/CHINARPA-4962
feature/CHINARPA-5015
feature/CHINARPA-5075
feature/CHINARPA-5092
feature/CHINARPA-5117
feature/CHINARPA-5118
feature/CHINARPA-5131-5234
feature/CHINARPA-5153
feature/CHINARPA-5155
feature/CHINARPA-5296
feature/CHINARPA-5504
feature/CHINARPA-5619
feature/CHINARPA-5620-dzfp
feature/CHINARPA_5015_SQL
feature/KWOM_July
feature/SE
feature/SE2
feature/SE3
feature/add_log_20240924
feature/add_try_except
feature/admin
feature/admin2
feature/auto-flag
feature/e-bank
feature/enhancement-file-name-change
feature/f3
feature/filter-file
feature/fix_label_40_dydjhmh
feature/fsm-contract
feature/fsm-full
feature/hotfix_insurance
feature/mssql-encrypt
feature/new-pwd
feature/pdftoimg
feature/pentest
feature/pres
feature/pres-3034
feature/qrs
feature/report
feature/report2
feature/rpa
feature/sc
feature/seOct
feature/token
feature/uat-new
feature/uat-tmp
feature/uat-tmp-cms-yace
feature/uat-tmp-cy
feature/uat-tmp-wblog
feature/upgrade_cut_img
feature/weixin-bs
feature/weixin-bs-2
feature/zfb
feature/zip
feature_add_down_payment
feature_add_income_keywords_cy
feature_add_insurance_sec_page
fix/1118上线问题反馈
fix/1227
fix/2024-05-pen-test
fix/20240424
fix/hil_excel_sql
fix/id-card
fix/new_hil_contract
fix/report_ca
hotfix/2025-02
hotfix/2025-04
hotfix/2025-06
master-0117
ocr-Pre-Settlement
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
113 additions
and
13 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/consts.py
View file @
31656b3
...
...
@@ -520,7 +520,7 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None,
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
# "
38
":"普通打印-无格线-农业银行-整数-特殊",
# "
50
":"普通打印-无格线-农业银行-整数-特殊",
CLASSIFY_LIST
=
[
(
'其他'
,
OTHER_TUPLE
),
...
...
@@ -563,6 +563,18 @@ CLASSIFY_LIST = [
(
'针式打印-部分格线-竖版-邮储银行'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'针式打印-部分格线-竖版-邮储银行-绿卡'
,
(
2
,
None
,
5
,
6
,
None
,
4
,
None
,
7
,
None
,
None
,
None
,
None
,
None
)),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'其他'
,
OTHER_TUPLE
),
(
'普通打印-无格线-农业银行-整数-特殊'
,
(
1
,
None
,
3
,
4
,
None
,
2
,
None
,
5
,
None
,
None
,
None
,
None
,
None
)),
]
...
...
@@ -608,6 +620,18 @@ CLASSIFY_HEADER_LIST = [
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
(
'序号'
,
'交易日期'
,
'交易渠道'
,
'摘要'
,
'交易金额'
,
'账户余额'
,
'对方账号/卡号/汇票号'
,
'原子账号'
,
'交易机构名称'
),
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
OTHER_TUPLE
,
(
'交易日期'
,
'摘要/附言'
,
'交易金额'
,
'账户余额'
,
'对方账号和户名'
),
]
...
...
@@ -812,7 +836,7 @@ MVI_FIELD_ORDER = (('发票代码', '发票代码'),
(
'主管税务机关及代码'
,
'主管税务机关及代码'
),
(
'吨位'
,
'吨位'
),
(
'限乘人数'
,
'限乘人数'
),)
IC_PID
=
VAT_PID
=
VATS_PID
=
MVC_PID
=
MVI_PID
=
None
IC_PID
=
VAT_PID
=
VATS_PID
=
MVC_PID
=
MVI_PID
=
RP_PID
=
None
# 营业执照
BL_CN_NAME
=
'营业执照'
...
...
@@ -916,6 +940,10 @@ BC_FIELD_ORDER = (('BankName', '发卡行名称'),
(
'CardType'
,
'银行卡类型'
),
(
'Name'
,
'持卡人姓名'
),)
# DDA
DDA_CN_NAME
=
'DDA'
DDA_CLASSIFY
=
38
SUCCESS_CODE_SET
=
{
'0'
,
0
}
FIELD_ORDER_MAP
=
{
...
...
@@ -941,7 +969,7 @@ MODEL_FIELD_VAT = 'vat_count'
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
,
MVI_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_MVI
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_IC
)),
(
RP_CLASSIFY
,
(
None
,
RP_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_RP
)),
(
RP_CLASSIFY
,
(
RP_PID
,
RP_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_RP
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
,
BC_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_BC
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
,
BL_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_BL
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
,
UCI_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_UCI
)),
...
...
@@ -960,12 +988,12 @@ FOLDER_LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, F
LICENSE_CLASSIFY_MAPPING
=
dict
(
LICENSE_ORDER
)
OTHER_CLASSIFY_SET
=
{
OTHER_CLASSIFY
}
LICENSE_CLASSIFY_SET_1
=
{
IC_CLASSIFY
,
VAT_CLASSIFY
,
MVC_CLASSIFY
,
MVI_CLASSIFY
,
UCI_CLASSIFY
}
LICENSE_CLASSIFY_SET_1
=
{
IC_CLASSIFY
,
VAT_CLASSIFY
,
MVC_CLASSIFY
,
MVI_CLASSIFY
,
UCI_CLASSIFY
,
DDA_CLASSIFY
}
LICENSE_CLASSIFY_SET_2
=
{
BL_CLASSIFY
,
EEP_CLASSIFY
,
DL_CLASSIFY
,
PP_CLASSIFY
,
BC_CLASSIFY
}
NYYH_CLASSIFY
=
{
17
,
18
}
NYZS_CLASSIFY
=
18
SPECIAL_NYZS_CLASSIFY
=
38
SPECIAL_NYZS_CLASSIFY
=
50
MS_CLASSIFY
=
21
MS_ERROR_COL
=
(
5
,
6
)
WECHART_CLASSIFY
=
12
...
...
@@ -1163,5 +1191,24 @@ UC_ORDER = ('vinNo', 'manufactureDate', 'firstRegistrationDate')
CO_ORDER
=
(
'customerType'
,
'customerChineseName'
,
'legalRepName'
,
'idNum'
,
'businessLicenseNo'
,
'taxRegistrationCode'
,
'incorporationDate'
,
'businessLicenseDueDate'
,
'capitalRegAmount'
)
# --------------- DDA 保存图片 --------------------
DDA_FIELD
=
'DDA'
IC_FIELD
=
'ID'
BC_FIELD
=
'BC'
IC_KEY_FIELD
=
(
'姓名'
,
'公民身份号码'
)
BC_KEY_FIELD
=
'CardNum'
DDA_IC_NAME
=
'customer_name'
DDA_IC_ID
=
'customer_id'
DDA_BC_NAME
=
'account_name'
DDA_BC_ID
=
'account_id'
DDA_IMG_PATH
=
'img_path'
DDA_MAPPING
=
[
(
DDA_IC_NAME
,
IC_FIELD
),
(
DDA_IC_ID
,
IC_FIELD
),
(
DDA_BC_ID
,
BC_FIELD
),
]
...
...
This diff is collapsed.
Click to expand it.
src/apps/doc/management/commands/ocr_process.py
View file @
31656b3
...
...
@@ -189,13 +189,23 @@ class Command(BaseCommand, LoggerMixin):
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
license_data
=
ocr_data
.
get
(
'data'
,
[]
)
license_data
=
ocr_data
.
get
(
'data'
)
if
not
license_data
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
classify
==
consts
.
DDA_CLASSIFY
:
# DDA处理
dda_ocr_result
=
{
consts
.
DDA_IC_NAME
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_IC_NAME
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_IC_ID
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_IC_ID
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_BC_NAME
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_BC_NAME
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_BC_ID
:
license_data
.
get
(
'result'
,
{})
.
get
(
consts
.
DDA_BC_ID
,
{})
.
get
(
'words'
,
''
),
consts
.
DDA_IMG_PATH
:
img_path
}
license_summary
.
setdefault
(
classify
,
[])
.
append
(
dda_ocr_result
)
if
classify
==
consts
.
MVC_CLASSIFY
:
# 车辆登记证 3/4页结果整合
for
mvc_dict
in
license_data
:
try
:
...
...
@@ -231,6 +241,8 @@ class Command(BaseCommand, LoggerMixin):
mvc_dict
[
'解除抵押日期'
]
.
append
(
register_info
.
get
(
'details'
,
{})
.
get
(
'date'
,
{})
.
get
(
'words'
,
''
))
del
mvc_res
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
if
classify
==
consts
.
IC_CLASSIFY
:
# 身份证真伪
for
id_card_dict
in
license_data
:
try
:
...
...
@@ -277,9 +289,18 @@ class Command(BaseCommand, LoggerMixin):
'{0} [ocr_4 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
id_card_dict
[
consts
.
IC_TURE_OR_FALSE
]
=
consts
.
IC_RES_MAPPING
.
get
(
card_type
)
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
finally
:
if
is_hil
:
for
key
in
consts
.
IC_KEY_FIELD
:
if
not
isinstance
(
id_card_dict
.
get
(
key
),
str
):
break
value
=
id_card_dict
[
key
]
.
strip
()
if
len
(
value
)
>
0
:
hil_id_bc_mapping
.
setdefault
(
consts
.
IC_FIELD
,
dict
())
.
setdefault
(
value
,
set
())
.
add
(
img_path
)
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
):
if
ocr_res_2
.
get
(
'ErrorCode'
)
in
consts
.
SUCCESS_CODE_SET
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
pid
==
consts
.
BC_PID
:
...
...
@@ -288,6 +309,11 @@ class Command(BaseCommand, LoggerMixin):
# for en_key, chn_key in consts.BC_FIELD:
# res_dict[chn_key] = ocr_res_2.get(en_key, '')
license_summary
.
setdefault
(
classify
,
[])
.
append
(
ocr_res_2
)
if
is_hil
and
isinstance
(
ocr_res_2
.
get
(
consts
.
BC_KEY_FIELD
),
str
):
value
=
ocr_res_2
[
consts
.
BC_KEY_FIELD
]
.
strip
()
if
len
(
value
)
>
0
:
hil_id_bc_mapping
.
setdefault
(
consts
.
BC_FIELD
,
dict
())
.
setdefault
(
value
,
set
())
.
add
(
img_path
)
else
:
# 营业执照等
for
result_dict
in
ocr_res_2
.
get
(
'ResultList'
,
[]):
...
...
@@ -696,6 +722,8 @@ class Command(BaseCommand, LoggerMixin):
business_type
,
doc_id_str
=
task_str
.
split
(
consts
.
SPLIT_STR
)
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
is_hil
=
True
if
business_type
==
consts
.
HIL_PREFIX
else
False
hil_id_bc_mapping
=
dict
()
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
consts
.
TMP_DIR_NAME
,
doc_id_str
)
excel_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.xlsx'
.
format
(
doc_id_str
))
...
...
@@ -742,8 +770,8 @@ class Command(BaseCommand, LoggerMixin):
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_1
:
# 证件1
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
)
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
)
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_2
:
# 证件2
pid
,
_
,
_
,
_
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
file_data
=
ocr_data
.
get
(
'section_img'
)
...
...
@@ -788,7 +816,9 @@ class Command(BaseCommand, LoggerMixin):
card_name_res
.
get
(
'data'
,
{})
.
get
(
'is_exists_name'
)
==
0
:
name
=
'无'
ocr_2_res
[
'Name'
]
=
name
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
self
.
license2_process
(
ocr_2_res
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
is_hil
,
hil_id_bc_mapping
)
break
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
...
...
@@ -890,6 +920,29 @@ class Command(BaseCommand, LoggerMixin):
os
.
remove
(
excel_path
)
finally
:
# TODO 识别结果存一张表,方便跑报表
# DDA处理
if
is_hil
:
# 获取需要保存图片的集合
hil_dda_save_img_list
=
[]
for
dda_res_list
in
license_summary
.
get
(
consts
.
DDA_CLASSIFY
,
[]):
for
dda_idx
,
dda_res
in
enumerate
(
dda_res_list
):
save_img_dict
=
{
consts
.
DDA_FIELD
:
dda_res
.
get
(
consts
.
DDA_IMG_PATH
)
}
for
dda_field
,
license_field
in
consts
.
DDA_MAPPING
:
target_str
=
dda_res
.
get
(
dda_field
,
''
)
save_img_dict
.
setdefault
(
license_field
,
set
())
.
update
(
hil_id_bc_mapping
.
get
(
license_field
,
dict
())
.
get
(
target_str
,
set
()))
hil_dda_save_img_list
.
append
(
save_img_dict
)
self
.
online_log
.
info
(
'{0} [DDA process] [DDA_info={1}]'
.
format
(
self
.
log_base
,
license_summary
.
get
(
consts
.
DDA_CLASSIFY
,
[])))
self
.
online_log
.
info
(
'{0} [DDA process] [ic&bc_info={1}]'
.
format
(
self
.
log_base
,
hil_id_bc_mapping
))
self
.
online_log
.
info
(
'{0} [DDA process] [img_path={1}]'
.
format
(
self
.
log_base
,
hil_dda_save_img_list
))
# 保存图片
# 数据库记录
# report记录
# CA比对
if
doc
.
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
0
]:
try
:
# 更新OCR累计识别结果表
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment