Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
6a5899fa
authored
2020-10-14 17:51:45 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add asyncio
1 parent
f682cf20
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
117 additions
and
78 deletions
.gitignore
src/apps/doc/management/commands/doc_ocr_process.py
.gitignore
View file @
6a5899f
...
...
@@ -33,6 +33,4 @@ data/*
# 脚本
src/*.sh
test.py
ocr_test.py
ocr_test_2.py
\ No newline at end of file
test*
\ No newline at end of file
...
...
src/apps/doc/management/commands/doc_ocr_process.py
View file @
6a5899f
...
...
@@ -175,14 +175,12 @@ class Command(BaseCommand, LoggerMixin):
(
field_dict
.
get
(
'chn_key'
,
''
),
field_dict
.
get
(
'value'
,
''
)))
license_summary
.
setdefault
(
classify
,
[])
.
append
(
res_list
)
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
# ) as session:
# json_data = self.get_ocr_json(img_path)
# async with session.post(self.ocr_url, json=json_data) as response:
# return await response.json()
#
async
def
fetch_ocr_result
(
self
,
url
,
json_data
):
async
with
aiohttp
.
ClientSession
(
connector
=
aiohttp
.
TCPConnector
(
ssl
=
False
))
as
session
:
async
with
session
.
post
(
url
,
json
=
json_data
)
as
response
:
if
response
.
status
==
200
:
return
await
response
.
json
()
# async def img_2_ocr_2_wb(self, wb, img_path, summary):
# res = await self.fetch_ocr_result(img_path)
# self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
...
...
@@ -190,56 +188,7 @@ class Command(BaseCommand, LoggerMixin):
# img_name = os.path.basename(img_path)
# self.append_sheet(wb, sheets_list, img_name, summary)
def
img_2_ocr_2_wb
(
self
,
wb
,
img_path
,
bs_summary
,
unknown_summary
,
license_summary
):
# # 流水
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'data': [
# {
# 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# 'cells': []
# },
# {
# 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# 'cells': []
# }
# ]
# }
# }
#
# # 证件-1
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# 'data': [
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# {
# 'cn_key': 'value',
# 'cn_key': 'value',
# },
# ]
# }
# }
#
# # 证件-2 or 其他类
# res = {
# 'code': 1,
# 'msg': 'success',
# 'data': {
# 'classify': 0,
# 'confidence': 0.999,
# }
# }
async
def
img_2_ocr_2_wb
(
self
,
wb
,
img_path
,
bs_summary
,
unknown_summary
,
license_summary
):
with
open
(
img_path
,
'rb'
)
as
f
:
base64_data
=
base64
.
b64encode
(
f
.
read
())
# 获取解码后的base64值
...
...
@@ -247,9 +196,10 @@ class Command(BaseCommand, LoggerMixin):
json_data_1
=
{
"file"
:
file_data
}
response_1
=
requests
.
post
(
self
.
ocr_url_1
,
json
=
json_data_1
)
if
response_1
.
status_code
==
200
:
ocr_res_1
=
response_1
.
json
()
ocr_res_1
=
await
self
.
fetch_ocr_result
(
self
.
ocr_url_1
,
json_data_1
)
if
ocr_res_1
is
None
:
raise
Exception
(
'ocr 1 error, img_path={0}'
.
format
(
img_path
))
else
:
self
.
cronjob_log
.
info
(
'{0} [ocr_1 result] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
ocr_res_1
))
...
...
@@ -270,22 +220,112 @@ class Command(BaseCommand, LoggerMixin):
"secret"
:
conf
.
OCR_SECRET
,
"file"
:
file_data
}
response_2
=
requests
.
post
(
self
.
ocr_url_2
,
data
=
json_data_2
)
if
response_2
.
status_code
==
200
:
ocr_res_2
=
await
self
.
fetch_ocr_result
(
self
.
ocr_url_2
,
json_data_2
)
if
ocr_res_2
is
None
:
raise
Exception
(
'ocr 2 error, img_path={0}'
.
format
(
img_path
))
else
:
# 识别结果
ocr_res_2
=
response_2
.
json
()
self
.
cronjob_log
.
info
(
'{0} [ocr_2 result] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
ocr_res_2
))
self
.
license2_process
(
ocr_res_2
,
license_summary
,
pid
,
classify
)
else
:
raise
Exception
(
'ocr 2 error, img_path={0}'
.
format
(
img_path
))
else
:
# 流水处理
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
img_path
,
classify
)
else
:
pass
else
:
raise
Exception
(
'ocr 1 error, img_path={0}'
.
format
(
img_path
))
# def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary):
# # # 流水
# # res = {
# # 'code': 1,
# # 'msg': 'success',
# # 'data': {
# # 'classify': 0,
# # 'confidence': 0.999,
# # 'data': [
# # {
# # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# # 'cells': []
# # },
# # {
# # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'],
# # 'cells': []
# # }
# # ]
# # }
# # }
# #
# # # 证件-1
# # res = {
# # 'code': 1,
# # 'msg': 'success',
# # 'data': {
# # 'classify': 0,
# # 'confidence': 0.999,
# # 'data': [
# # {
# # 'cn_key': 'value',
# # 'cn_key': 'value',
# # },
# # {
# # 'cn_key': 'value',
# # 'cn_key': 'value',
# # },
# # ]
# # }
# # }
# #
# # # 证件-2 or 其他类
# # res = {
# # 'code': 1,
# # 'msg': 'success',
# # 'data': {
# # 'classify': 0,
# # 'confidence': 0.999,
# # }
# # }
# with open(img_path, 'rb') as f:
# base64_data = base64.b64encode(f.read())
# # 获取解码后的base64值
# file_data = base64_data.decode()
# json_data_1 = {
# "file": file_data
# }
# response_1 = requests.post(self.ocr_url_1, json=json_data_1)
# if response_1.status_code == 200:
# ocr_res_1 = response_1.json()
# self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format(
# self.log_base, img_path, ocr_res_1))
#
# if ocr_res_1.get('code') == 1:
# ocr_data = ocr_res_1.get('data', {})
# classify = ocr_data.get('classify')
# if classify is None:
# return
# elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
# return
# elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
# self.license1_process(ocr_data, license_summary, classify)
# elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
# pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
# json_data_2 = {
# "pid": str(pid),
# "key": conf.OCR_KEY,
# "secret": conf.OCR_SECRET,
# "file": file_data
# }
# response_2 = requests.post(self.ocr_url_2, data=json_data_2)
# if response_2.status_code == 200:
# # 识别结果
# ocr_res_2 = response_2.json()
# self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format(
# self.log_base, img_path, ocr_res_2))
# self.license2_process(ocr_res_2, license_summary, pid, classify)
# else:
# raise Exception('ocr 2 error, img_path={0}'.format(img_path))
# else: # 流水处理
# self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify)
# else:
# pass
# else:
# raise Exception('ocr 1 error, img_path={0}'.format(img_path))
@staticmethod
def
get_most
(
value_list
):
...
...
@@ -475,13 +515,14 @@ class Command(BaseCommand, LoggerMixin):
# wb = Workbook()
# 4.1 获取OCR结果
# loop = asyncio.get_event_loop()
# tasks = [self.img_2_ocr_2_wb(wb, img_path, summary) for img_path in pdf_handler.img_path_list]
# loop.run_until_complete(asyncio.wait(tasks))
loop
=
asyncio
.
get_event_loop
()
tasks
=
[
self
.
img_2_ocr_2_wb
(
wb
,
img_path
,
bs_summary
,
unknown_summary
,
license_summary
)
for
img_path
in
pdf_handler
.
img_path_list
]
loop
.
run_until_complete
(
asyncio
.
wait
(
tasks
))
# loop.close()
for
img_path
in
pdf_handler
.
img_path_list
:
self
.
img_2_ocr_2_wb
(
wb
,
img_path
,
bs_summary
,
unknown_summary
,
license_summary
)
#
for img_path in pdf_handler.img_path_list:
#
self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary)
self
.
cronjob_log
.
info
(
'{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'
.
format
(
self
.
log_base
,
bs_summary
,
unknown_summary
,
license_summary
))
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment