Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
bd0eb30d
authored
2022-08-07 18:56:51 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
pdf pwd from doc name
1 parent
bd80e62d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
2 deletions
src/celery_compare/tasks.py
src/common/tools/pdf_to_img.py
src/celery_compare/tasks.py
View file @
bd0eb30
...
...
@@ -1596,7 +1596,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list, auto=Fals
jypz_field_input
.
append
((
'vinNo'
,
vin_no
))
jypz_field_input
.
append
((
'vehicleTransactionAmount'
,
amount
))
jypz_field_input
.
append
((
consts
.
SE_GB_USED_FIELD
[
2
],
first_submission_date
))
if
fp_group
.
find
(
'Non
-
OCU Product Group'
)
!=
-
1
:
if
fp_group
.
find
(
'Non
OCU Product Group'
)
!=
-
1
:
jypz_field_input
.
append
((
'type'
,
consts
.
JYPZ_TYPE_1
))
elif
fp_group
.
find
(
'OCU Product Group'
)
!=
-
1
:
jypz_field_input
.
append
((
'type'
,
consts
.
JYPZ_TYPE_2
))
...
...
src/common/tools/pdf_to_img.py
View file @
bd0eb30
import
os
import
re
import
json
import
cv2
import
shutil
...
...
@@ -48,8 +49,9 @@ class PDFBuild:
class
PDFHandler
:
def
__init__
(
self
,
path
,
img_dir_path
,
document_name
=
None
):
def
__init__
(
self
,
path
,
img_dir_path
,
document_name
=
None
,
pwd_list
=
[]
):
self
.
path
=
path
self
.
pwd_list
=
self
.
get_pwd_list
(
document_name
,
pwd_list
)
self
.
img_dir_path
=
img_dir_path
self
.
img_path_list
=
[]
self
.
img_count
=
0
...
...
@@ -75,6 +77,12 @@ class PDFHandler:
self
.
page_count
=
None
self
.
metadata
=
None
@staticmethod
def
get_pwd_list
(
doc_name
,
pwd_list
):
pwd_list_from_doc_name
=
re
.
findall
(
r'\d{6}'
,
doc_name
)
pwd_list_from_doc_name
.
extend
(
pwd_list
)
return
pwd_list_from_doc_name
def
get_suffix
(
self
,
file_name
):
if
file_name
is
None
:
return
None
...
...
@@ -370,6 +378,12 @@ class PDFHandler:
self
.
img_path_list
.
append
(
img_save_path
)
else
:
with
fitz
.
Document
(
self
.
path
)
as
pdf
:
# 解密
for
pwd
in
self
.
pwd_list
:
if
not
pdf
.
isEncrypted
:
break
pdf
.
authenticate
(
pwd
)
self
.
metadata
=
pdf
.
metadata
self
.
page_count
=
pdf
.
pageCount
if
isinstance
(
max_img_count
,
int
)
and
pdf
.
pageCount
>=
max_img_count
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment