Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
1c6d880f
authored
2020-09-27 18:40:22 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add license
1 parent
c1c49a8e
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
19 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/ocr/wb.py
src/common/tools/pdf_to_img.py
src/apps/doc/consts.py
View file @
1c6d880
This diff is collapsed.
Click to expand it.
src/apps/doc/management/commands/doc_ocr_process.py
View file @
1c6d880
This diff is collapsed.
Click to expand it.
src/apps/doc/ocr/wb.py
View file @
1c6d880
...
...
@@ -168,7 +168,7 @@ class BSWorkbook(Workbook):
month_mapping
.
setdefault
(
month_list
[
-
1
],
[])
.
insert
(
0
,
(
ws
.
title
,
idx_list
[
-
1
]
+
min_row
,
ws
.
max_row
,
0
))
def
build_metadata_rows
(
self
,
c
lassify
,
confidence
,
rol
e
,
code
,
print_time
,
start_date
,
end_date
):
def
build_metadata_rows
(
self
,
c
onfidenc
e
,
code
,
print_time
,
start_date
,
end_date
):
if
start_date
is
None
or
end_date
is
None
:
timedelta
=
None
else
:
...
...
@@ -176,10 +176,6 @@ class BSWorkbook(Workbook):
metadata_rows
=
[
(
'流水识别置信度'
,
confidence
),
self
.
blank_row
,
(
'分类结果'
,
classify
),
self
.
blank_row
,
(
'户名'
,
role
),
self
.
blank_row
,
self
.
code_header
,
]
metadata_rows
.
extend
(
code
)
...
...
@@ -200,19 +196,19 @@ class BSWorkbook(Workbook):
ms
=
self
.
create_sheet
(
'{0}({1})'
.
format
(
self
.
meta_sheet_title
,
card
))
return
ms
def
build_meta_sheet
(
self
,
card
,
c
lassify
,
confidence
,
rol
e
,
code
,
print_time
,
start_date
,
end_date
):
metadata_rows
=
self
.
build_metadata_rows
(
c
lassify
,
confidence
,
rol
e
,
code
,
print_time
,
start_date
,
end_date
)
def
build_meta_sheet
(
self
,
card
,
c
onfidenc
e
,
code
,
print_time
,
start_date
,
end_date
):
metadata_rows
=
self
.
build_metadata_rows
(
c
onfidenc
e
,
code
,
print_time
,
start_date
,
end_date
)
ms
=
self
.
create_meta_sheet
(
card
)
for
row
in
metadata_rows
:
ms
.
append
(
row
)
return
ms
def
build_month_sheet
(
self
,
role
,
month_mapping
,
ms
,
is_reverse
):
def
build_month_sheet
(
self
,
card
,
month_mapping
,
ms
,
is_reverse
):
tmp_ws
=
self
.
create_sheet
(
'tmp_ws'
)
for
month
in
sorted
(
month_mapping
.
keys
()):
# 3.1.拷贝数据
parts
=
month_mapping
.
get
(
month
)
new_ws
=
self
.
create_sheet
(
'{0}({1})'
.
format
(
month
,
role
))
new_ws
=
self
.
create_sheet
(
'{0}({1})'
.
format
(
month
,
card
[
-
6
:]
))
new_ws
.
append
(
consts
.
FIXED_HEADERS
)
for
part
in
parts
:
ws
=
self
.
get_sheet_by_name
(
part
[
0
])
...
...
@@ -338,9 +334,7 @@ class BSWorkbook(Workbook):
# 2.元信息提取表
ms
=
self
.
build_meta_sheet
(
card
,
summary
[
'classify'
],
summary
[
'confidence'
],
summary
[
'role'
],
summary
[
'code'
],
summary
[
'print_time'
],
start_date
,
...
...
@@ -359,13 +353,16 @@ class BSWorkbook(Workbook):
self
.
remove
(
self
.
get_sheet_by_name
(
sheet
))
def
license_rebuild
(
self
,
license_summary
):
for
en_key
,
cn_key
in
consts
.
LICENSE_ORDER
:
ws
=
self
.
create_sheet
(
cn_key
)
for
bl
in
license_summary
.
get
(
en_key
,
[]):
for
classify
,
(
_
,
name
)
in
consts
.
LICENSE_ORDER
:
res
=
license_summary
.
get
(
classify
)
if
res
is
None
:
continue
ws
=
self
.
create_sheet
(
name
)
for
bl
in
res
:
for
bl_field
in
bl
:
ws
.
append
(
bl_field
)
ws
.
append
((
None
,
))
def
rebuild
(
self
,
bs_summary
,
license_summary
):
self
.
bs_rebuild
(
bs_summary
)
#
self.license_rebuild(license_summary)
self
.
license_rebuild
(
license_summary
)
...
...
src/common/tools/pdf_to_img.py
View file @
1c6d880
...
...
@@ -25,7 +25,7 @@ class PDFHandler:
def
__init__
(
self
,
path
,
img_dir_path
):
self
.
path
=
path
self
.
img_dir_path
=
img_dir_path
self
.
img_
info
_list
=
[]
self
.
img_
path
_list
=
[]
self
.
xref_set
=
set
()
def
get_img_save_path
(
self
,
pno
,
img_index
=
0
,
ext
=
'png'
):
...
...
@@ -38,7 +38,7 @@ class PDFHandler:
pm
=
page
.
getPixmap
(
matrix
=
trans_2
,
alpha
=
False
)
img_save_path
=
self
.
get_img_save_path
(
page
.
number
)
pm
.
writePNG
(
img_save_path
)
self
.
img_
info_list
.
append
((
img_save_path
,
page
.
number
,
0
)
)
self
.
img_
path_list
.
append
(
img_save_path
)
@staticmethod
def
getimage
(
pix
):
...
...
@@ -88,7 +88,7 @@ class PDFHandler:
with
open
(
img_save_path
,
"wb"
)
as
f
:
f
.
write
(
img_data
)
self
.
xref_set
.
add
(
xref
)
self
.
img_
info_list
.
append
((
img_save_path
,
pno
,
img_index
)
)
self
.
img_
path_list
.
append
(
img_save_path
)
@staticmethod
def
split_il
(
il
):
...
...
@@ -179,7 +179,7 @@ class PDFHandler:
img_save_path
=
self
.
get_img_save_path
(
pno
,
img_index
,
im_list
[
0
][
2
])
new_img
.
save
(
img_save_path
)
page_to_png
=
False
self
.
img_
info_list
.
append
((
img_save_path
,
pno
,
img_index
)
)
self
.
img_
path_list
.
append
(
img_save_path
)
# 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片
if
page_to_png
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment