Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
c1c49a8e
authored
2020-09-23 18:57:10 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
license part 1
1 parent
96b67222
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
98 additions
and
13 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/consts.py
View file @
c1c49a8
...
...
@@ -478,3 +478,14 @@ BC_FIELD = (('CardNum', '银行卡号'),
SUCCESS_CODE_SET
=
{
'0'
,
0
}
BC_PID
=
4
OTHER_SET
=
{
0
,
1
,
2
}
BS_SET
=
{
10
,
11
,
12
}
LICENSE_SET_1
=
{
110
,
111
,
112
}
LICENSE_SET_2
=
{
1110
,
1111
,
1112
}
CLASSIFY_PID_DICT
=
{
0
:
(
4
,
BC_KEY
)
# 银行卡
}
...
...
src/apps/doc/management/commands/doc_ocr_process.py
View file @
c1c49a8
...
...
@@ -4,6 +4,7 @@ import signal
import
asyncio
import
aiohttp
import
difflib
import
base64
import
requests
from
datetime
import
datetime
,
date
from
collections
import
Counter
...
...
@@ -30,7 +31,8 @@ class Command(BaseCommand, LoggerMixin):
# 数据目录
self
.
data_dir
=
conf
.
DATA_DIR
# ocr相关
self
.
ocr_url
=
conf
.
OCR_URL
self
.
ocr_url_1
=
conf
.
OCR_URL_1
self
.
ocr_url_2
=
conf
.
OCR_URL_2
# EDMS web_service_api
self
.
edms
=
EDMS
(
conf
.
EDMS_USER
,
conf
.
EDMS_PWD
)
# 优雅退出信号:15
...
...
@@ -79,7 +81,7 @@ class Command(BaseCommand, LoggerMixin):
return
doc_data_path
,
excel_path
,
src_excel_path
,
pdf_path
@staticmethod
def
append_bs_sheet
(
wb
,
sheets
,
bs_summary
,
unknown_summary
,
pno
,
img_idx
,
classify
,
confidence
):
def
bs_process
(
wb
,
sheets
,
bs_summary
,
unknown_summary
,
pno
,
img_idx
,
classify
,
confidence
):
for
i
,
sheet
in
enumerate
(
sheets
):
sheet_name
=
'page_{0}_img_{1}_{2}'
.
format
(
pno
,
img_idx
,
i
)
# ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间']
...
...
@@ -137,7 +139,39 @@ class Command(BaseCommand, LoggerMixin):
words
=
cell
.
get
(
'words'
)
ws
.
cell
(
row
=
r1
+
1
,
column
=
c1
+
1
,
value
=
words
)
def
license2_process
(
self
,
img_path
,
license_summary
,
pid
,
license_key
):
with
open
(
img_path
,
'rb'
)
as
f
:
base64_data
=
base64
.
b64encode
(
f
.
read
())
# 获取解码后的base64值
filedata
=
base64_data
.
decode
()
# pid 产品的pid, key, secret 登录之后能够查看到
datas
=
{
"pid"
:
str
(
pid
),
"key"
:
conf
.
OCR_KEY
,
"secret"
:
conf
.
OCR_SECRET
,
"file"
:
filedata
}
r
=
requests
.
post
(
self
.
ocr_url_2
,
data
=
datas
)
if
r
.
status_code
==
200
:
# 识别结果
response
=
r
.
json
()
if
response
.
get
(
'ErrorCode'
)
in
consts
.
SUCCESS_CODE_SET
:
if
pid
==
consts
.
BC_PID
:
# 银行卡
res_list
=
[]
for
en_key
,
chn_key
in
consts
.
BC_FIELD
:
res_list
.
append
((
chn_key
,
response
.
get
(
en_key
,
''
)))
license_summary
.
setdefault
(
license_key
,
[])
.
append
(
res_list
)
else
:
# 营业执照、行驶证等
for
result_dict
in
response
.
get
(
'ResultList'
,
[]):
res_list
=
[]
for
field_dict
in
result_dict
.
get
(
'FieldList'
,
[]):
res_list
.
append
((
field_dict
.
get
(
'chn_key'
,
''
),
field_dict
.
get
(
'value'
,
''
)))
license_summary
.
setdefault
(
license_key
,
[])
.
append
(
res_list
)
def
ocr_2_wb
(
self
,
res
,
wb
,
pno
,
img_idx
,
bs_summary
,
unknown_summary
,
license_summary
):
# # 流水
# res = {
# 'code': 1,
# 'msg': 'success',
...
...
@@ -156,18 +190,55 @@ class Command(BaseCommand, LoggerMixin):
# ]
# }
# }
#
# # 证件-1
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'data': [
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# ]
# }
# }
#
# # 证件-2 or 其他类
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# }
# }
data
=
res
.
get
(
'data'
,
{})
classify
=
data
.
get
(
'classify'
)
if
classify
is
None
:
return
# if classify in
sheets
=
data
.
get
(
'sheets'
,
[])
if
not
sheets
:
elif
classify
in
consts
.
OTHER_SET
:
# 其他类
return
confidence
=
data
.
get
(
'confidence'
,
1
)
self
.
append_bs_sheet
(
wb
,
sheets
,
bs_summary
,
unknown_summary
,
pno
,
img_idx
,
classify
,
confidence
)
# else:
# pass
elif
classify
in
consts
.
BS_SET
:
# 流水处理
sheets
=
data
.
get
(
'sheets'
,
[])
if
not
sheets
:
return
confidence
=
data
.
get
(
'confidence'
,
1
)
self
.
bs_process
(
wb
,
sheets
,
bs_summary
,
unknown_summary
,
pno
,
img_idx
,
classify
,
confidence
)
elif
classify
in
consts
.
LICENSE_SET_1
:
# 证件1
# self.license1_process() # TODO license1
pass
elif
classify
in
consts
.
LICENSE_SET_2
:
# 证件2
pid
,
license_key
=
consts
.
CLASSIFY_PID_DICT
.
get
(
classify
)
self
.
license2_process
(
license_summary
,
pid
,
license_key
)
# TODO reuse img data?
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
...
...
@@ -188,8 +259,9 @@ class Command(BaseCommand, LoggerMixin):
files
=
[
(
'img'
,
open
(
img_path
,
'rb'
))
]
response
=
requests
.
request
(
"POST"
,
self
.
ocr_url
,
files
=
files
)
return
response
.
json
()
response
=
requests
.
request
(
"POST"
,
self
.
ocr_url_1
,
files
=
files
)
if
response
.
status_code
==
200
:
return
response
.
json
()
def
img_2_ocr_2_wb
(
self
,
wb
,
img_info
,
bs_summary
,
unknown_summary
,
license_summary
):
res
=
self
.
fetch_ocr_result
(
img_info
[
0
])
...
...
@@ -255,7 +327,6 @@ class Command(BaseCommand, LoggerMixin):
summary
[
'role'
]
=
self
.
get_most
(
summary
[
'role'
])
return
bs_summary
def
rebuild_bs_summary
(
self
,
bs_summary
,
unknown_summary
):
# bs_summary = {
# '卡号': {
...
...
@@ -336,9 +407,12 @@ class Command(BaseCommand, LoggerMixin):
return
merged_bs_summary
# TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理
# TODO 调用接口重试
# TODO 协程异步发送OCR请求
# TODO 调用接口重试
# TODO 异常邮件通知
# 识别失败:普通异常,如PDF异常、构建过程异常
# EDMS异常:下载异常-->回队列-->邮件;上传异常-->重新上传队列-->邮件
# 算法异常:第一道异常-->识别失败-->邮件;第二道异常-->识别失败-->邮件
# TODO 数据库断联问题
# TODO 非流水证件处理
# TODO EDMS API GATEWAY
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment