Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
a89b45ad
authored
2022-10-31 16:27:53 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add password from pos
1 parent
262ede9d
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
51 additions
and
15 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/apps/doc/views.py
src/common/tools/file_tools.py
src/common/tools/mssql_script22.py
src/common/tools/pdf_to_img.py
src/apps/doc/management/commands/ocr_process.py
View file @
a89b45a
...
...
@@ -1165,7 +1165,7 @@ class Command(BaseCommand, LoggerMixin):
extract_path
=
os
.
path
.
join
(
doc_data_path
,
'extract_content'
)
os
.
makedirs
(
extract_path
,
exist_ok
=
True
)
try
:
pwd_list
=
get_pwd_list_from_str
(
zip_doc
.
document_name
)
pwd_list
=
get_pwd_list_from_str
(
zip_doc
.
document_name
,
zip_doc
.
password
)
is_success
=
extract_zip_or_rar
(
zip_path
,
extract_path
,
pwd_list
)
except
Exception
as
e
:
is_success
=
False
...
...
@@ -1281,7 +1281,8 @@ class Command(BaseCommand, LoggerMixin):
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
pdf_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.pdf'
.
format
(
doc
.
id
))
pdf_handler
=
PDFHandler
(
pdf_path
,
img_save_path
,
doc
.
document_name
)
pwd_list
=
get_pwd_list_from_str
(
doc
.
document_name
,
doc
.
password
)
pdf_handler
=
PDFHandler
(
pdf_path
,
img_save_path
,
doc
.
document_name
,
pwd_list
=
pwd_list
)
if
classify_1_str
==
'0'
:
try
:
...
...
src/apps/doc/models.py
View file @
a89b45a
...
...
@@ -64,6 +64,7 @@ class HILDoc(models.Model):
page_count
=
models
.
IntegerField
(
null
=
True
,
verbose_name
=
'文件page数目'
)
metadata
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"电子PDF专属,PDF信息"
)
password
=
models
.
CharField
(
null
=
True
,
max_length
=
64
,
verbose_name
=
"文件密码"
)
class
Meta
:
managed
=
False
...
...
@@ -106,6 +107,7 @@ class AFCDoc(models.Model):
page_count
=
models
.
IntegerField
(
null
=
True
,
verbose_name
=
'文件page数目'
)
metadata
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"电子PDF专属,PDF信息"
)
password
=
models
.
CharField
(
null
=
True
,
max_length
=
64
,
verbose_name
=
"文件密码"
)
class
Meta
:
managed
=
False
...
...
src/apps/doc/views.py
View file @
a89b45a
...
...
@@ -563,6 +563,7 @@ class UploadDocView(GenericView, DocHandler):
document_scheme
=
document
.
get
(
'documentScheme'
)
data_source
=
document
.
get
(
'dataSource'
)
document_name
=
document
.
get
(
'documentName'
,
''
)
pwd
=
document
.
get
(
'password'
,
''
)
data_source
=
self
.
fix_data_source
(
data_source
)
document_scheme
=
self
.
fix_scheme
(
document_scheme
)
...
...
@@ -581,6 +582,7 @@ class UploadDocView(GenericView, DocHandler):
document_scheme
=
document_scheme
,
data_source
=
data_source
,
upload_finish_time
=
document
.
get
(
'uploadFinishTime'
),
password
=
pwd
if
isinstance
(
pwd
,
str
)
and
len
(
pwd
)
>
0
else
None
,
)
# 3. 选择队列进入
...
...
src/common/tools/file_tools.py
View file @
a89b45a
...
...
@@ -25,12 +25,18 @@ def write_zip_file(dir_name, zipfile_path):
z
.
write
(
src_file_path
,
file_target_path
)
def
get_pwd_list_from_str
(
doc_name
):
def
get_pwd_list_from_str
(
doc_name
,
doc_password
):
all_password
=
[]
if
isinstance
(
doc_password
,
str
)
and
len
(
doc_password
)
>
0
:
all_password
.
append
(
doc_password
)
try
:
pwd_list
=
re
.
findall
(
r'\d{6}'
,
doc_name
)
return
pwd_list
pwd_list_from_doc_name
=
re
.
findall
(
r'\d{6}'
,
doc_name
)
except
Exception
as
e
:
return
[]
pwd_list_from_doc_name
=
[]
all_password
.
extend
(
pwd_list_from_doc_name
)
return
all_password
def
extract_zip_or_rar
(
file_path
,
extract_path
,
pwd_list
=
[]):
...
...
src/common/tools/mssql_script22.py
0 → 100644
View file @
a89b45a
import
pyodbc
hil_sql
=
"""
ALTER TABLE hil_doc ADD password nvarchar(64);
"""
afc_sql
=
"""
ALTER TABLE afc_doc ADD password nvarchar(64);
"""
hil_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};'
,
autocommit
=
True
)
hil_cursor
=
hil_cnxn
.
cursor
()
hil_cursor
.
execute
(
hil_sql
)
hil_cursor
.
close
()
hil_cnxn
.
close
()
afc_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};'
,
autocommit
=
True
)
afc_cursor
=
afc_cnxn
.
cursor
()
afc_cursor
.
execute
(
afc_sql
)
afc_cursor
.
close
()
afc_cnxn
.
close
()
src/common/tools/pdf_to_img.py
View file @
a89b45a
...
...
@@ -51,7 +51,7 @@ class PDFHandler:
def
__init__
(
self
,
path
,
img_dir_path
,
document_name
=
None
,
pwd_list
=
[]):
self
.
path
=
path
self
.
pwd_list
=
self
.
get_pwd_list
(
document_name
,
pwd_list
)
self
.
pwd_list
=
pwd_list
self
.
img_dir_path
=
img_dir_path
self
.
img_path_list
=
[]
self
.
img_count
=
0
...
...
@@ -80,14 +80,14 @@ class PDFHandler:
self
.
title_idx
=
None
self
.
date_pattern
=
re
.
compile
(
r'^\d+ \d{4}(-)(1[0-2]|0?[1-9])\1(0?[1-9]|[1-2]\d|30|31)$'
)
@staticmethod
def
get_pwd_list
(
doc_name
,
pwd_list
):
try
:
pwd_list_from_doc_name
=
re
.
findall
(
r'\d{6}'
,
doc_name
)
pwd_list_from_doc_name
.
extend
(
pwd_list
)
return
pwd_list_from_doc_name
except
Exception
as
e
:
return
pwd_list
#
@staticmethod
#
def get_pwd_list(doc_name, pwd_list):
#
try:
#
pwd_list_from_doc_name = re.findall(r'\d{6}', doc_name)
#
pwd_list_from_doc_name.extend(pwd_list)
#
return pwd_list_from_doc_name
#
except Exception as e:
#
return pwd_list
def
get_suffix
(
self
,
file_name
):
if
file_name
is
None
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment