Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
a220590e
authored
2020-08-25 11:45:56 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
update ocr url
1 parent
10a4a80f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
18 deletions
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/management/commands/doc_ocr_process.py
View file @
a220590
...
...
@@ -28,16 +28,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
switch
=
True
# 数据目录
self
.
data_dir
=
conf
.
DATA_DIR
# pdf页面转图片
self
.
zoom_x
=
2.0
self
.
zoom_y
=
2.0
self
.
trans
=
fitz
.
Matrix
(
self
.
zoom_x
,
self
.
zoom_y
)
.
preRotate
(
0
)
# zoom factor 2 in each dimension
# ocr相关
self
.
ocr_url
=
conf
.
OCR_URL
self
.
ocr_header
=
{
'X-Auth-Token'
:
conf
.
OCR_TOKEN
,
'Content-Type'
:
'application/json'
}
# EDMS web_service_api
self
.
edms
=
EDMS
(
conf
.
EDMS_USER
,
conf
.
EDMS_PWD
)
# 优雅退出信号:15
...
...
@@ -103,12 +95,6 @@ class Command(BaseCommand, LoggerMixin):
words
=
cell
.
get
(
'words'
)
ws
.
cell
(
row
=
r1
+
1
,
column
=
c1
+
1
,
value
=
words
)
@staticmethod
def
get_ocr_json
(
img_path
):
with
open
(
img_path
,
"rb"
)
as
f
:
base64_data
=
base64
.
b64encode
(
f
.
read
())
return
{
'imgBase64'
:
base64_data
.
decode
(
'utf-8'
)}
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
...
...
@@ -125,15 +111,20 @@ class Command(BaseCommand, LoggerMixin):
# self.append_sheet(wb, sheets_list, img_name, role_summary)
def
fetch_ocr_result
(
self
,
img_path
):
json_data
=
self
.
get_ocr_json
(
img_path
)
response
=
requests
.
post
(
self
.
ocr_url
,
json
=
json_data
,
headers
=
self
.
ocr_header
)
# payload = {'name': 'page_0_img_0_0'}
files
=
[
(
'img'
,
open
(
img_path
,
'rb'
))
]
response
=
requests
.
request
(
"POST"
,
self
.
ocr_url
,
files
=
files
)
return
response
.
json
()
def
img_ocr_excel
(
self
,
wb
,
img_path
,
role_summary
):
res
=
self
.
fetch_ocr_result
(
img_path
)
self
.
cronjob_log
.
info
(
'{0} [fetch ocr result success] [img={1}] [res={2}]'
.
format
(
self
.
log_base
,
img_path
,
res
))
if
res
.
get
(
'code'
)
==
'1'
:
sheets_list
=
res
.
get
(
'result'
)
.
get
(
'res'
)
if
res
.
get
(
'code'
)
==
1
:
sheets_list
=
res
.
get
(
'data'
)
if
not
sheets_list
:
return
img_name
=
os
.
path
.
basename
(
img_path
)
self
.
append_sheet
(
wb
,
sheets_list
,
img_name
,
role_summary
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment