Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
83de3e22
authored
2021-11-06 15:45:15 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix e-contract
1 parent
9ff5682c
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
4 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/views.py
src/common/tools/pdf_to_img.py
src/apps/doc/management/commands/ocr_process.py
View file @
83de3e2
...
...
@@ -227,6 +227,10 @@ class Command(BaseCommand, LoggerMixin):
return
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
page_num
=
contract_dict
.
get
(
'page_num'
)
if
page_num
.
startswith
(
'page_'
):
page_num_only
=
page_num
.
split
(
'_'
)[
-
1
]
else
:
page_num_only
=
page_num
rebuild_page_info
=
[]
text_key
=
'words'
for
key
,
value
in
contract_dict
.
get
(
'page_info'
,
{})
.
items
():
...
...
@@ -256,8 +260,8 @@ class Command(BaseCommand, LoggerMixin):
for
row_list
in
sub_value
[
text_key
]:
rebuild_page_info
.
append
(
row_list
)
# contract_result.setdefault(page_num, []).append(rebuild_page_info)
contract_result
.
setdefault
(
classify
,
dict
())
.
setdefault
(
page_num
,
[])
.
append
(
rebuild_page_info
)
# contract_result.setdefault(page_num
_only
, []).append(rebuild_page_info)
contract_result
.
setdefault
(
classify
,
dict
())
.
setdefault
(
page_num
_only
,
[])
.
append
(
rebuild_page_info
)
# def rebuild_result(self, ocr_data, classify, img_path):
# license_data = ocr_data.get('data')
...
...
src/apps/doc/ocr/wb.py
View file @
83de3e2
...
...
@@ -710,7 +710,7 @@ class BSWorkbook(Workbook):
for
i
in
range
(
30
):
if
str
(
i
)
in
contract_result
:
page_num
=
str
(
i
)
info_list
=
contract_result
.
get
(
page_num
)
info_list
=
contract_result
.
get
(
page_num
,
[]
)
# for page_num, info_list in contract_result.items():
ws
.
append
((
'page {0}'
.
format
(
page_num
),
))
for
info
in
info_list
:
...
...
src/apps/doc/views.py
View file @
83de3e2
...
...
@@ -550,7 +550,7 @@ class UploadDocView(GenericView, DocHandler):
classify_1
=
classify_2
=
0
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
-
1
]
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
,
classify_2_tmp
in
consts
.
FILE_NAME_PREFIX
_MAP
.
get
(
prefix
):
for
keyword
,
classify_1_tmp
,
classify_2_tmp
in
consts
.
ECONTRACT_KEYWORDS
_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
classify_2
=
classify_2_tmp
...
...
src/common/tools/pdf_to_img.py
View file @
83de3e2
...
...
@@ -300,6 +300,7 @@ class PDFHandler:
self
.
page_text_list
=
page_text_list
def
e_contract_process
(
self
):
os
.
makedirs
(
self
.
img_dir_path
,
exist_ok
=
True
)
with
fitz
.
Document
(
self
.
path
)
as
pdf
:
for
pno
in
range
(
pdf
.
pageCount
):
page
=
pdf
.
loadPage
(
pno
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment