Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
678b3bd0
authored
2023-02-09 16:33:49 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
modify CA yhls sheet
1 parent
903a34ba
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
2 deletions
src/apps/doc/consts.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
678b3bd
...
...
@@ -1145,6 +1145,7 @@ MS_ERROR_COL = (5, 6)
WECHART_CLASSIFY
=
12
NEW_ZHIFUBAO_CLASSIFY
=
48
ALI_WECHART_CLASSIFY
=
{
12
,
13
,
48
}
JSYH_CLASSIFY
=
{
11
,
27
,
34
}
WECHART_ERROR_COL
=
(
1
,
2
)
SPECIAL_HEADERS_MAPPING
=
copy
.
deepcopy
(
HEADERS_MAPPING
)
SPECIAL_HEADERS_MAPPING
.
update
(
...
...
src/apps/doc/ocr/wb.py
View file @
678b3bd
...
...
@@ -40,6 +40,18 @@ class BSWorkbook(Workbook):
self
.
need_follow
=
False
@staticmethod
def
date_calibration
(
date_str
):
result
=
True
try
:
if
date_str
[
-
2
]
not
in
[
'20'
,
'21'
]:
result
=
False
if
date_str
[
-
5
:
-
3
]
not
in
[
'03'
,
'06'
,
'09'
,
'12'
]:
result
=
False
except
Exception
as
e
:
result
=
False
return
result
@staticmethod
def
replace_newline
(
queryset_value
):
new_set
=
set
()
for
v
in
queryset_value
:
...
...
@@ -338,7 +350,7 @@ class BSWorkbook(Workbook):
metadata_rows
.
append
((
'Producer'
,
producer
))
if
len
(
author
)
>
0
:
metadata_highlight_row
.
append
(
6
)
if
'iText'
not
in
producer
and
'Qt'
not
in
producer
and
'Haru Free'
not
in
producer
:
if
'iText'
not
in
producer
and
'Qt'
not
in
producer
and
'Haru Free'
not
in
producer
and
'OpenPDF'
not
in
producer
:
metadata_highlight_row
.
append
(
7
)
metadata_rows
.
append
(
self
.
blank_row
)
...
...
@@ -496,6 +508,7 @@ class BSWorkbook(Workbook):
tmp_ws
=
self
.
create_sheet
(
'tmp_ws'
)
tmp2_ws
=
self
.
create_sheet
(
'tmp2_ws'
)
tmp3_ws
=
self
.
create_sheet
(
'tmp3_ws'
)
if
classify
in
consts
.
ALI_WECHART_CLASSIFY
:
high_light_keyword
=
self
.
wechat_keyword
else
:
...
...
@@ -629,7 +642,7 @@ class BSWorkbook(Workbook):
# 关键词1提取
if
summary_cell_value
in
self
.
interest_keyword
:
new_amount_cell_value
=
None
if
amount_cell
is
None
else
amount_cell
.
value
m
s
.
append
((
summary_cell_value
,
date_cell_value
,
new_amount_cell_value
))
tmp3_w
s
.
append
((
summary_cell_value
,
date_cell_value
,
new_amount_cell_value
))
# 关键词2提取至临时表
elif
summary_cell_value
in
self
.
salary_keyword
:
new_amount_cell_value
=
None
if
amount_cell
is
None
else
amount_cell
.
value
...
...
@@ -657,6 +670,18 @@ class BSWorkbook(Workbook):
# if summary_cell_idx is not None:
# new_ws[row][summary_cell_idx].fill = self.amount_fill
# 关键词1信息提取:结息
for
row
in
tmp3_ws
.
iter_rows
(
values_only
=
True
):
ms
.
append
(
row
)
# 建设银行
if
classify
in
consts
.
JSYH_CLASSIFY
:
if
isinstance
(
row
[
1
],
str
)
and
self
.
date_calibration
(
row
[
1
]):
pass
else
:
for
cell
in
ms
[
ms
.
max_row
]:
cell
.
fill
=
self
.
amount_fill
self
.
remove
(
tmp3_ws
)
# 关键词2信息提取
ms
.
append
(
self
.
blank_row
)
ms
.
append
(
self
.
salary_keyword_header
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment