Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
0b2cf2d1
authored
2022-08-29 16:30:15 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix zfb emoji
1 parent
090b26b2
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
1 deletions
src/common/tools/pdf_to_img.py
src/common/tools/pdf_to_img.py
View file @
0b2cf2d
...
...
@@ -82,9 +82,12 @@ class PDFHandler:
@staticmethod
def
get_pwd_list
(
doc_name
,
pwd_list
):
try
:
pwd_list_from_doc_name
=
re
.
findall
(
r'\d{6}'
,
doc_name
)
pwd_list_from_doc_name
.
extend
(
pwd_list
)
return
pwd_list_from_doc_name
except
Exception
as
e
:
return
pwd_list
def
get_suffix
(
self
,
file_name
):
if
file_name
is
None
:
...
...
@@ -355,9 +358,17 @@ class PDFHandler:
for
line
in
block
.
get
(
'lines'
):
for
span
in
line
.
get
(
'spans'
):
char
=
span
.
get
(
'text'
)
bbox
=
span
.
get
(
'bbox'
)
if
char
.
strip
()
==
''
:
continue
# 特殊emoji跳过
try
:
print
(
char
)
except
Exception
as
e
:
continue
bbox
=
span
.
get
(
'bbox'
)
if
pno
==
0
and
self
.
title_is_ebank
(
char
):
in_ebank_set
=
True
text_list
.
append
((
bbox
,
char
))
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment