Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
4acc8f62
authored
2020-08-20 11:20:02 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
# from openpyxl import Workbook
1 parent
d4e9acd6
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
12 deletions
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/management/commands/doc_ocr_process.py
View file @
4acc8f6
...
...
@@ -5,7 +5,7 @@ import signal
import
base64
import
asyncio
import
aiohttp
# from openpyxl import Workbook
import
requests
from
apps.doc.ocr.wb
import
BSWorkbook
,
Workbook
from
django.core.management
import
BaseCommand
...
...
@@ -109,17 +109,30 @@ class Command(BaseCommand, LoggerMixin):
base64_data
=
base64
.
b64encode
(
f
.
read
())
return
{
'imgBase64'
:
base64_data
.
decode
(
'utf-8'
)}
async
def
fetch_ocr_result
(
self
,
img_path
):
async
with
aiohttp
.
ClientSession
(
headers
=
self
.
ocr_header
,
connector
=
aiohttp
.
TCPConnector
(
ssl
=
False
)
)
as
session
:
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
# ) as session:
# json_data = self.get_ocr_json(img_path)
# async with session.post(self.ocr_url, json=json_data) as response:
# return await response.json()
#
# async def img_ocr_excel(self, wb, img_path, role_summary):
# res = await self.fetch_ocr_result(img_path)
# self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
# sheets_list = res.get('result').get('res')
# img_name = os.path.basename(img_path)
# self.append_sheet(wb, sheets_list, img_name, role_summary)
def
fetch_ocr_result
(
self
,
img_path
):
json_data
=
self
.
get_ocr_json
(
img_path
)
async
with
session
.
post
(
self
.
ocr_url
,
json
=
json_data
)
as
response
:
return
await
response
.
json
()
response
=
requests
.
post
(
self
.
ocr_url
,
json
=
json_data
,
headers
=
self
.
ocr_header
)
return
response
.
json
()
async
def
img_ocr_excel
(
self
,
wb
,
img_path
,
role_summary
):
res
=
await
self
.
fetch_ocr_result
(
img_path
)
def
img_ocr_excel
(
self
,
wb
,
img_path
,
role_summary
):
res
=
self
.
fetch_ocr_result
(
img_path
)
self
.
cronjob_log
.
info
(
'{0} [fetch ocr result success] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
res
))
if
res
.
get
(
'code'
)
==
1
:
sheets_list
=
res
.
get
(
'result'
)
.
get
(
'res'
)
img_name
=
os
.
path
.
basename
(
img_path
)
self
.
append_sheet
(
wb
,
sheets_list
,
img_name
,
role_summary
)
...
...
@@ -164,11 +177,14 @@ class Command(BaseCommand, LoggerMixin):
# loan_keyword = Keywords.objects.filter(type=KeywordsType.LOAN.value).values_list('keyword', flat=True)
# wb = BSWorkbook(interest_keyword, salary_keyword, loan_keyword)
wb
=
Workbook
()
loop
=
asyncio
.
get_event_loop
()
tasks
=
[
self
.
img_ocr_excel
(
wb
,
img_path
,
role_summary
)
for
img_path
in
pdf_handler
.
img_path_list
]
loop
.
run_until_complete
(
asyncio
.
wait
(
tasks
))
#
loop = asyncio.get_event_loop()
#
tasks = [self.img_ocr_excel(wb, img_path, role_summary) for img_path in pdf_handler.img_path_list]
#
loop.run_until_complete(asyncio.wait(tasks))
# loop.close()
for
img_path
in
pdf_handler
.
img_path_list
:
self
.
img_ocr_excel
(
wb
,
img_path
,
role_summary
)
# 整合excel文件
# wb.save(src_excel_path)
# wb.rebuild(role_summary)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment