Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
160ac57d
authored
2025-08-29 14:26:52 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
merge
2 parents
88f01673
d619642f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
8 deletions
src/apps/doc/internal_urls.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/views.py
src/apps/doc/internal_urls.py
View file @
160ac57
...
...
@@ -10,4 +10,7 @@ urlpatterns = [
path
(
r'invoice/downloadExcel'
,
views
.
InvoiceExcelView
.
as_view
()),
path
(
r'invoice/queryInfo'
,
views
.
InvoiceQueryInfoView
.
as_view
()),
path
(
r'contract/v1'
,
views
.
SEContractView
.
as_view
()),
path
(
r'reocr'
,
views
.
DocReOcrView
.
as_view
()),
path
(
r'batch/reocr'
,
views
.
BatchReOcrView
.
as_view
()),
]
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
160ac57
...
...
@@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin):
if
len
(
info_tuple
)
==
2
:
business_type
,
doc_id_str
=
info_tuple
else
:
business_type
,
doc_id_str
,
classify_1_str
=
info_tuple
business_type
,
doc_id_str
,
classify_1_str
,
re_ocr_flag
=
info_tuple
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
zip_doc
=
doc_class
.
objects
.
filter
(
id
=
doc_id
)
.
first
()
...
...
@@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
self
.
online_log
.
info
(
'{0} [zip_2_pdfs] [db save end] [task_str={1}]'
.
format
(
self
.
log_base
,
task_str
))
return
zip_doc
,
business_type
return
zip_doc
,
business_type
,
re_ocr_flag
def
get_doc_info
(
self
,
task_str
,
is_priority
=
False
):
try
:
...
...
@@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin):
classify_1_str
=
'0'
rebuild_task_str
=
task_str
else
:
business_type
,
doc_id_str
,
classify_1_str
=
info_tuple
business_type
,
doc_id_str
,
classify_1_str
,
re_ocr_flag
=
info_tuple
rebuild_task_str
=
'{0}{1}{2}'
.
format
(
business_type
,
consts
.
SPLIT_STR
,
doc_id_str
)
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
...
...
@@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
self
.
online_log
.
info
(
'{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'
.
format
(
self
.
log_base
,
task_str
,
is_priority
))
return
doc
,
business_type
,
rebuild_task_str
,
classify_1_str
return
doc
,
business_type
,
rebuild_task_str
,
classify_1_str
,
re_ocr_flag
# def pdf_download(self, doc, pdf_path):
# if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
...
...
@@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [zip_2_pdfs] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
# 2. 修改doc状态: 识别中
zip_doc
,
business_type
=
self
.
get_zip_doc_info
(
task_str
)
zip_doc
,
business_type
,
re_ocr_flag
=
self
.
get_zip_doc_info
(
task_str
)
if
zip_doc
is
None
:
time
.
sleep
(
self
.
sleep_time_doc_get
)
continue
...
...
@@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin):
try
:
# 1. 从队列获取文件信息
doc
,
business_type
,
task_str
,
classify_1_str
=
self
.
get_doc_info
(
task_str
,
is_priority
)
doc
,
business_type
,
task_str
,
classify_1_str
,
re_ocr_flag
=
self
.
get_doc_info
(
task_str
,
is_priority
)
# 队列为空时的处理
if
doc
is
None
:
time
.
sleep
(
self
.
sleep_time_doc_get
)
...
...
@@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [pdf to img start] [task={1}] [times={2}]'
.
format
(
self
.
log_base
,
task_str
,
times
))
start_time
=
time
.
time
()
pdf_handler
.
extract_image
(
max_img_count
)
max_img_count_or_none
=
None
if
re_ocr_flag
==
'Y'
else
max_img_count
pdf_handler
.
extract_image
(
max_img_count_or_none
)
end_time
=
time
.
time
()
speed_time
=
int
(
end_time
-
start_time
)
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'
.
format
(
...
...
@@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
warn
(
'{0} [pdf to img failed (pdf img empty)] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
raise
Exception
(
'pdf img empty'
)
elif
pdf_handler
.
img_count
>=
max_img_count
:
elif
re_ocr_flag
==
'N'
and
pdf_handler
.
img_count
>=
max_img_count
:
self
.
online_log
.
info
(
'{0} [too many pdf image] [task={1}] [img_count={2}]'
.
format
(
self
.
log_base
,
task_str
,
pdf_handler
.
img_count
))
...
...
src/apps/doc/views.py
View file @
160ac57
This diff is collapsed.
Click to expand it.
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment