Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
591d1ac9
authored
2020-12-10 17:44:07 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add ic true/false
1 parent
789f8e25
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
66 additions
and
4 deletions
src/apps/doc/consts.py
src/apps/doc/exceptions.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/consts.py
View file @
591d1ac
...
...
@@ -624,7 +624,9 @@ OTHER_CLASSIFY = 2
# 身份证
IC_CN_NAME
=
'身份证'
IC_CLASSIFY
=
33
IC_FIELD_ORDER_0
=
((
'姓名'
,
'姓名'
),
IC_TURE_OR_FALSE
=
'真伪'
IC_FIELD_ORDER_0
=
((
IC_TURE_OR_FALSE
,
'身份证'
),
(
'姓名'
,
'姓名'
),
(
'公民身份号码'
,
'公民身份号码'
),
(
'出生年月'
,
'出生年月'
),
(
'住址'
,
'住址'
),
...
...
@@ -934,3 +936,10 @@ PATTERN_LIST = ['交易名称', '收入/支出金额', '收入', '存入', '支
'短摘要'
,
'本次余额'
,
'交易后余额'
,
'交易说明'
,
'帐户余额'
,
'交易日期 记账日期'
]
CN_RE
=
re
.
compile
(
u'[
\u4e00
-
\u9fa5
]'
)
IC_RES_MAPPING
=
{
-
2
:
'不是有效证件'
,
-
1
:
'无法判断'
,
0
:
'伪造证件'
,
1
:
'真实证件'
,
}
...
...
src/apps/doc/exceptions.py
View file @
591d1ac
...
...
@@ -8,3 +8,7 @@ class OCR1Exception(Exception):
class
OCR2Exception
(
Exception
):
pass
class
OCR4Exception
(
Exception
):
pass
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
591d1ac
...
...
@@ -23,7 +23,7 @@ from common.tools.pdf_to_img import PDFHandler
from
apps.doc
import
consts
from
apps.doc.ocr.edms
import
EDMS
,
rh
from
apps.doc.named_enum
import
KeywordsType
from
apps.doc.exceptions
import
EDMSException
,
OCR1Exception
,
OCR2Exception
from
apps.doc.exceptions
import
EDMSException
,
OCR1Exception
,
OCR2Exception
,
OCR4Exception
from
apps.doc.ocr.wb
import
BSWorkbook
,
Workbook
from
apps.doc.models
import
DocStatus
,
HILDoc
,
AFCDoc
,
Keywords
...
...
@@ -48,6 +48,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
ocr_1_urls
=
conf
.
get_namespace
(
'OCR_URL_1_'
)
self
.
ocr_url_2
=
conf
.
OCR_URL_2
self
.
ocr_url_3
=
conf
.
BC_URL
self
.
ocr_url_4
=
conf
.
IC_URL
# EDMS web_service_api
self
.
edms
=
EDMS
()
# 优雅退出信号:15
...
...
@@ -189,13 +190,60 @@ class Command(BaseCommand, LoggerMixin):
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
# 类别:'0'身份证, '1'居住证
license_data
=
ocr_data
.
get
(
'data'
,
[])
if
not
license_data
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
if
classify
==
consts
.
IC_CLASSIFY
:
for
id_card_dict
in
license_data
:
try
:
base64_img
=
id_card_dict
.
pop
(
'base64_img'
)
except
Exception
as
e
:
continue
else
:
card_type
=
-
1
json_data_4
=
{
'mode'
:
1
,
'user_info'
:
{
'image_content'
:
base64_img
,
},
'options'
:
{
'distinguish_type'
:
1
,
'auto_rotate'
:
True
,
},
}
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
start_time
=
time
.
time
()
ocr_4_response
=
requests
.
post
(
self
.
ocr_url_4
,
json
=
json_data_4
)
if
ocr_4_response
.
status_code
!=
200
:
raise
OCR4Exception
(
'ocr_4 status code: {0}'
.
format
(
ocr_4_response
.
status_code
))
except
Exception
as
e
:
self
.
cronjob_log
.
warn
(
'{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'
.
format
(
self
.
log_base
,
times
,
img_path
,
traceback
.
format_exc
()))
else
:
ocr_4_res
=
json
.
loads
(
ocr_4_response
.
json
())
end_time
=
time
.
time
()
speed_time
=
int
(
end_time
-
start_time
)
if
ocr_4_res
.
get
(
'code'
)
==
0
and
ocr_4_res
.
get
(
'result'
,
{})
.
get
(
'rtn'
)
==
0
:
card_type
=
ocr_4_res
.
get
(
'result'
,
{})
.
get
(
'idcard_distinguish_result'
,
{})
.
get
(
'result'
,
-
1
)
self
.
cronjob_log
.
info
(
'{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'
.
format
(
self
.
log_base
,
img_path
,
speed_time
))
break
else
:
self
.
cronjob_log
.
warn
(
'{0} [ocr_4 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
id_card_dict
[
consts
.
IC_TURE_OR_FALSE
]
=
consts
.
IC_RES_MAPPING
.
get
(
card_type
)
license_summary
.
setdefault
(
classify
,
[])
.
extend
(
license_data
)
def
license2_process
(
self
,
ocr_res_2
,
license_summary
,
pid
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
...
...
@@ -559,7 +607,8 @@ class Command(BaseCommand, LoggerMixin):
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_1
:
# 证件1
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
)
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_2
:
# 证件2
pid
,
_
,
_
,
_
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
file_data
=
ocr_data
.
get
(
'section_img'
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment