Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
cbe57bd2
authored
2024-06-11 12:46:00 +0800
by
chenyao
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add jira-4562 content, add income_keywords
1 parent
a883d3ea
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
41 additions
and
5 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/consts.py
View file @
cbe57bd
...
...
@@ -2532,4 +2532,17 @@ FSM_ACTIVITED_STATUS = {
"APIPN"
:
"Activated-Invoice Passed-Non PT"
,
"APIPP"
:
"Activated-Invoice Passed-PT Doc Required"
,
"APARD"
:
"Activated-Review done"
,
}
# Jira-4562 - 银行流水首页提取关键词
INCOME_KEYWORDS_LIST
=
[
"养老金"
,
"社保"
,
"代发工资"
,
"工资入账"
,
"奖金"
,
"养老保险"
,
"代发"
,
"工资"
]
INCOME_KEYWORDS_DICT
=
{
"养老金"
:
"yanglaojin"
,
"社保"
:
"shebao"
,
"代发工资"
:
"daifagongzi"
,
"工资入账"
:
"gongziruzhang"
,
"奖金"
:
"jiangjin"
,
"养老保险"
:
"yanglaobaoxian"
,
"代发"
:
"daifa"
,
"工资"
:
"gongzi"
}
\ No newline at end of file
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
cbe57bd
...
...
@@ -177,7 +177,7 @@ class Command(BaseCommand, LoggerMixin):
# raise EDMSException(edms_exc)
# self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path))
def
bs_process
(
self
,
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
):
def
bs_process
(
self
,
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
income_keywords_dictionary
):
sheets
=
ocr_data
.
get
(
'data'
,
[])
if
not
sheets
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
...
...
@@ -196,6 +196,10 @@ class Command(BaseCommand, LoggerMixin):
c1
=
cell
.
get
(
'start_column'
)
r1
=
cell
.
get
(
'start_row'
)
words
=
cell
.
get
(
'words'
)
if
words
is
not
None
:
if
words
in
consts
.
INCOME_KEYWORDS_LIST
:
if
consts
.
INCOME_KEYWORDS_DICT
.
get
(
words
)
not
in
income_keywords_dictionary
[
"income_keywords"
]:
income_keywords_dictionary
[
"income_keywords"
]
.
append
(
consts
.
INCOME_KEYWORDS_DICT
.
setdefault
(
words
,
""
))
ws
.
cell
(
row
=
r1
+
1
,
column
=
c1
+
1
,
value
=
words
)
# 真伪
...
...
@@ -921,7 +925,7 @@ class Command(BaseCommand, LoggerMixin):
summary
[
'role'
]
=
self
.
get_most
(
summary
[
'role'
])
return
bs_summary
def
rebuild_bs
(
self
,
bs_summary
):
def
rebuild_bs
(
self
,
bs_summary
,
income_keywords_dictionary
):
# bs_summary = {
# '卡号': {
# 'classify': 0,
...
...
@@ -935,7 +939,24 @@ class Command(BaseCommand, LoggerMixin):
# 'sheet': ['sheet_name']
# }
# }
# income_keywords_dictionary = {
# 'income_keywords': [ # 其中 0-8 个
# 'yanglaojin',
# "shebao",
# "daifagongzi",
# "gongziruzhang",
# "jiangjin",
# "yanglaobaoxian",
# "daifa",
# "gongzi"
# ]
# }
res
=
[]
income_keywords_list
=
income_keywords_dictionary
.
get
(
'income_keywords'
,
[])
income_filtered_keywords
=
[
keyword_str
for
keyword_str
in
income_keywords_list
if
keyword_str
]
income_keywords_str
=
","
.
join
(
income_filtered_keywords
)
for
bs_info
in
bs_summary
.
values
():
try
:
print_date
=
bs_info
.
get
(
'print_time'
,
''
)
.
strftime
(
"
%
Y-
%
m-
%
d"
)
...
...
@@ -950,7 +971,8 @@ class Command(BaseCommand, LoggerMixin):
'print_time'
:
print_date
,
'timedelta'
:
bs_info
.
get
(
'timedelta'
,
''
),
'verify'
:
bs_info
.
get
(
'verify_res_ebank'
,
True
),
'e_bank'
:
bs_info
.
get
(
'e_bank'
,
False
)
'e_bank'
:
bs_info
.
get
(
'e_bank'
,
False
),
'income_keywords'
:
income_keywords_str
}
)
return
res
...
...
@@ -1724,6 +1746,7 @@ class Command(BaseCommand, LoggerMixin):
license_summary
=
{}
contract_result
=
{}
contract_result_compare
=
{}
income_keywords_dictionary
=
{
"income_keywords"
:
[]}
res_list
=
[]
interest_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
INTEREST
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
...
...
@@ -1879,7 +1902,7 @@ class Command(BaseCommand, LoggerMixin):
ino
,
part_idx
,
img_path
,
contract_result_compare
)
else
:
# 流水处理
bs_classify_set
.
add
(
classify
)
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
income_keywords_dictionary
)
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_1
))
self
.
online_log
.
info
(
'{0} [ocr_1 res error] [img={1}]'
.
format
(
self
.
log_base
,
img_path
))
...
...
@@ -2035,7 +2058,7 @@ class Command(BaseCommand, LoggerMixin):
license_summary
,
contract_result_compare
))
self
.
rebuild_contract
(
license_summary
,
contract_result_compare
)
bs_rebuild
=
self
.
rebuild_bs
(
merged_bs_summary
)
bs_rebuild
=
self
.
rebuild_bs
(
merged_bs_summary
,
income_keywords_dictionary
)
if
len
(
bs_rebuild
)
>
0
:
license_summary
[
consts
.
BS_CLASSIFY
]
=
bs_rebuild
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment