Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
c996af2d
authored
2023-11-13 11:44:55 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add flag
1 parent
77132c68
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
4 deletions
src/apps/doc/management/commands/ocr_process.py
src/common/tools/pdf_to_img.py
src/apps/doc/management/commands/ocr_process.py
View file @
c996af2
...
...
@@ -1339,8 +1339,8 @@ class Command(BaseCommand, LoggerMixin):
pdf_handler
.
extract_image
(
max_img_count
)
end_time
=
time
.
time
()
speed_time
=
int
(
end_time
-
start_time
)
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
speed_time
))
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}]
[is_new_modify={4}]
'
.
format
(
self
.
log_base
,
task_str
,
times
,
speed_time
,
pdf_handler
.
is_new_modify
))
except
Exception
as
e
:
self
.
online_log
.
warn
(
'{0} [download or pdf to img failed] [task={1}] [times={2}] '
'[error={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
...
...
src/common/tools/pdf_to_img.py
View file @
c996af2
...
...
@@ -63,6 +63,7 @@ class PDFHandler:
self
.
img_dir_path
=
img_dir_path
self
.
img_path_list
=
[]
self
.
img_count
=
0
self
.
is_new_modify
=
0
# 用于记录受新改动影响的PDF
self
.
xref_set
=
set
()
self
.
img_suffixs
=
{
'.jpeg'
,
'.jpg'
,
'.png'
,
'.webp'
,
'.bmp'
}
self
.
suffix
=
self
.
get_suffix
(
document_name
)
...
...
@@ -246,8 +247,8 @@ class PDFHandler:
self
.
xref_set
.
add
(
xref
)
self
.
img_path_list
.
append
(
img_save_path
)
@staticmethod
def
split_il
(
il
):
#
@staticmethod
def
split_il
(
self
,
il
):
broken_il
=
[]
start
=
0
length
=
len
(
il
)
...
...
@@ -258,6 +259,7 @@ class PDFHandler:
page_to_png
=
True
break
if
il
[
i
][
2
]
>=
TINY_IMG_MAX_WIDTH
:
self
.
is_new_modify
=
1
page_to_png
=
True
break
else
:
...
...
@@ -460,6 +462,7 @@ class PDFHandler:
self
.
page_to_png
(
page
)
# 大图
elif
width
>=
WH_COUPLE_6
[
0
]
or
height
>=
WH_COUPLE_6
[
1
]:
self
.
is_new_modify
=
1
is_big_img
=
(
width
<
WH_COUPLE_7
[
0
]
and
height
<
WH_COUPLE_7
[
1
])
# 防止图片过大
page
=
pdf
.
loadPage
(
pno
)
self
.
page_to_png
(
page
,
is_big_img
=
is_big_img
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment