Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
4b008519
authored
2021-06-11 17:09:38 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add keywords
1 parent
00bb1843
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
5 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/named_enum.py
src/apps/doc/ocr/wb.py
src/apps/doc/management/commands/ocr_process.py
View file @
4b00851
...
...
@@ -778,7 +778,9 @@ class Command(BaseCommand, LoggerMixin):
type
=
KeywordsType
.
LOAN
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
wechat_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
ALI_WECHART
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
wb
=
BSWorkbook
(
interest_keyword
,
salary_keyword
,
loan_keyword
,
wechat_keyword
)
repayments_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
REPAYMENTS
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
wb
=
BSWorkbook
(
interest_keyword
,
salary_keyword
,
loan_keyword
,
wechat_keyword
,
repayments_keyword
)
for
img_path
,
res
in
ocr_1_res
.
items
():
pno
,
ino
=
self
.
parse_img_path
(
img_path
)
part_idx
=
1
...
...
src/apps/doc/named_enum.py
View file @
4b00851
...
...
@@ -19,6 +19,7 @@ class KeywordsType(NamedEnum):
SALARY
=
(
1
,
'薪资'
)
LOAN
=
(
2
,
'贷款'
)
ALI_WECHART
=
(
3
,
'微信/支付宝'
)
REPAYMENTS
=
(
4
,
'还款'
)
class
RequestTeam
(
NamedEnum
):
...
...
src/apps/doc/ocr/wb.py
View file @
4b00851
...
...
@@ -14,17 +14,20 @@ from apps.doc import consts
class
BSWorkbook
(
Workbook
):
def
__init__
(
self
,
interest_keyword
,
salary_keyword
,
loan_keyword
,
wechat_keyword
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
interest_keyword
,
salary_keyword
,
loan_keyword
,
wechat_keyword
,
repayments_keyword
,
*
args
,
**
kwargs
):
super
()
.
__init__
(
*
args
,
**
kwargs
)
locale
.
setlocale
(
locale
.
LC_NUMERIC
,
'en_US.UTF-8'
)
self
.
meta_sheet_title
=
'Key info'
self
.
blank_row
=
(
None
,)
self
.
code_header
=
(
'页数'
,
'电子回单验证码'
)
self
.
date_header
=
(
'打印时间'
,
'起始日期'
,
'终止日期'
,
'流水区间结果'
)
self
.
keyword_header
=
(
'关键词'
,
'记账日期'
,
'金额'
)
self
.
interest_keyword_header
=
(
'结息关键词'
,
'记账日期'
,
'金额'
)
self
.
salary_keyword_header
=
(
'收入关键词'
,
'记账日期'
,
'金额'
)
self
.
repayments_keyword_header
=
(
'还款关键词'
,
'记账日期'
,
'金额'
)
self
.
interest_keyword
=
self
.
replace_newline
(
interest_keyword
)
self
.
salary_keyword
=
self
.
replace_newline
(
salary_keyword
)
self
.
loan_keyword
=
self
.
replace_newline
(
loan_keyword
)
self
.
repayments_keyword
=
self
.
replace_newline
(
repayments_keyword
)
self
.
wechat_keyword
=
wechat_keyword
self
.
proof_res
=
(
'对'
,
'错'
)
self
.
loan_fill
=
PatternFill
(
"solid"
,
fgColor
=
"00FFCC00"
)
...
...
@@ -323,7 +326,7 @@ class BSWorkbook(Workbook):
self
.
date_header
,
(
print_time
,
start_date
,
end_date
,
timedelta
),
self
.
blank_row
,
self
.
keyword_header
]
self
.
interest_
keyword_header
]
)
return
metadata_rows
...
...
@@ -423,6 +426,7 @@ class BSWorkbook(Workbook):
result_idx
=
len
(
header
)
-
1
tmp_ws
=
self
.
create_sheet
(
'tmp_ws'
)
tmp2_ws
=
self
.
create_sheet
(
'tmp2_ws'
)
if
classify
in
consts
.
ALI_WECHART_CLASSIFY
:
high_light_keyword
=
self
.
wechat_keyword
else
:
...
...
@@ -536,6 +540,10 @@ class BSWorkbook(Workbook):
elif
summary_cell_value
in
self
.
salary_keyword
:
new_amount_cell_value
=
None
if
amount_cell
is
None
else
amount_cell
.
value
tmp_ws
.
append
((
summary_cell_value
,
date_cell_value
,
new_amount_cell_value
))
# 关键词3提取至临时表
elif
summary_cell_value
in
self
.
repayments_keyword
:
new_amount_cell_value
=
None
if
amount_cell
is
None
else
amount_cell
.
value
tmp2_ws
.
append
((
summary_cell_value
,
date_cell_value
,
new_amount_cell_value
))
# 贷款关键词高亮
# elif summary_cell_value in high_light_keyword:
# summary_cell.fill = self.amount_fill
...
...
@@ -557,11 +565,18 @@ class BSWorkbook(Workbook):
# 关键词2信息提取
ms
.
append
(
self
.
blank_row
)
ms
.
append
(
self
.
keyword_header
)
ms
.
append
(
self
.
salary_
keyword_header
)
for
row
in
tmp_ws
.
iter_rows
(
values_only
=
True
):
ms
.
append
(
row
)
self
.
remove
(
tmp_ws
)
# 关键词3信息提取
ms
.
append
(
self
.
blank_row
)
ms
.
append
(
self
.
repayments_keyword_header
)
for
row
in
tmp2_ws
.
iter_rows
(
values_only
=
True
):
ms
.
append
(
row
)
self
.
remove
(
tmp2_ws
)
def
bs_rebuild
(
self
,
bs_summary
):
# bs_summary = {
# '卡号': {
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment