Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
46cd5314
authored
2024-11-20 16:20:44 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge remote-tracking branch 'origin/feature/upgrade_cut_img'
2 parents
3ad32bbb
3c9e5a31
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
5 deletions
src/common/tools/pdf_to_img.py
src/common/tools/pdf_to_img.py
View file @
46cd531
...
...
@@ -345,7 +345,7 @@ class PDFHandler:
# 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片
if
page_to_png
:
page
=
pdf
.
loadPage
(
pno
)
self
.
page_to_png
(
page
,
is_big_img
=
True
)
self
.
page_to_png
(
page
)
def
title_is_ebank
(
self
,
char
):
new_char
=
normalize
(
'NFKC'
,
char
)
...
...
@@ -450,7 +450,7 @@ class PDFHandler:
# 1.页面图片对象数目为0时,保存整个页面为png图片
if
self
.
is_e_pdf
or
self
.
is_ebank
or
len
(
il
)
==
0
:
page
=
pdf
.
loadPage
(
pno
)
self
.
page_to_png
(
page
,
is_big_img
=
True
)
self
.
page_to_png
(
page
)
# 2.页面图片对象数目为1时:
# 小图(如电子账单的盖章):保存整个页面为png图片
# 大图:提取图片对象
...
...
@@ -459,13 +459,13 @@ class PDFHandler:
# 小图
if
width
<
WH_COUPLE_1
[
0
]
and
height
<
WH_COUPLE_1
[
1
]:
page
=
pdf
.
loadPage
(
pno
)
self
.
page_to_png
(
page
,
is_big_img
=
True
)
self
.
page_to_png
(
page
)
# 大图
elif
width
>=
WH_COUPLE_6
[
0
]
or
height
>=
WH_COUPLE_6
[
1
]:
self
.
is_new_modify
=
1
is_big_img
=
(
width
<
WH_COUPLE_7
[
0
]
and
height
<
WH_COUPLE_7
[
1
])
# 防止图片过大
page
=
pdf
.
loadPage
(
pno
)
self
.
page_to_png
(
page
,
is_big_img
=
True
)
self
.
page_to_png
(
page
,
is_big_img
=
is_big_img
)
elif
xref
not
in
self
.
xref_set
:
self
.
extract_single_image
(
pdf
,
xref
,
smask
,
colorspace
,
pno
)
# 3.页面图片对象数目大于1时,特殊处理
...
...
@@ -480,7 +480,7 @@ class PDFHandler:
with
fitz
.
Document
(
self
.
path
)
as
pdf
:
for
pno
in
range
(
pdf
.
pageCount
):
page
=
pdf
.
loadPage
(
pno
)
self
.
page_to_png
(
page
,
is_big_img
=
True
)
self
.
page_to_png
(
page
)
self
.
img_count
=
len
(
self
.
img_path_list
)
def
ebank_draw
(
self
):
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment