Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
33ea2687
authored
2021-12-07 16:48:33 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add bs verify
1 parent
8e146fd9
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
8 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
33ea268
...
...
@@ -1558,7 +1558,7 @@ BL_COMPARE_LOGIC = {
'registeredCapital'
:
(
'注册资本'
,
'se_rmb_compare'
,
{},
'营业执照注册资本与系统不一致'
),
}
BL_COMPARE_LOGIC
=
{
SME_
BL_COMPARE_LOGIC
=
{
'legalRepName'
:
(
'经营者姓名'
,
'se_name_compare'
,
{},
'营业执照法定代表人与系统不一致'
),
'businessLicenseDueDate'
:
(
'营业期限'
,
'se_date_compare'
,
{
'ocr_split'
:
True
,
'long'
:
True
,
'ocr_replace'
:
True
,
'today'
:
True
},
'公司营业期限疑似过期'
),
}
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
33ea268
...
...
@@ -168,6 +168,12 @@ class Command(BaseCommand, LoggerMixin):
words
=
cell
.
get
(
'words'
)
ws
.
cell
(
row
=
r1
+
1
,
column
=
c1
+
1
,
value
=
words
)
# 真伪
verify_info
=
[]
verify_dict
=
sheet
.
get
(
'verify'
,
{})
if
verify_dict
.
get
(
'verify_res'
)
==
'fake'
:
verify_info
.
extend
(
verify_dict
.
get
(
'verify_info'
,
[]))
# ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间']
summary
=
sheet
.
get
(
'summary'
)
card
=
summary
[
1
]
...
...
@@ -183,6 +189,7 @@ class Command(BaseCommand, LoggerMixin):
pt_list
=
role_dict
.
setdefault
(
'print_time'
,
[])
sd_list
=
role_dict
.
setdefault
(
'start_date'
,
[])
ed_list
=
role_dict
.
setdefault
(
'end_date'
,
[])
verify_list
=
role_dict
.
setdefault
(
'verify'
,
[])
if
summary
[
3
]
is
not
None
:
code_list
.
append
((
summary
[
2
],
summary
[
3
]))
if
summary
[
4
]
is
not
None
:
...
...
@@ -191,6 +198,10 @@ class Command(BaseCommand, LoggerMixin):
sd_list
.
append
(
summary
[
5
])
if
summary
[
6
]
is
not
None
:
ed_list
.
append
(
summary
[
6
])
if
len
(
verify_info
)
>
0
:
verify_list
.
append
(
(
pno
,
ino
,
'、'
.
join
(
verify_info
))
)
else
:
card_dict
=
bs_summary
.
setdefault
(
card
,
{})
card_dict
[
'count'
]
=
card_dict
.
get
(
'count'
,
0
)
+
1
...
...
@@ -203,6 +214,7 @@ class Command(BaseCommand, LoggerMixin):
pt_list
=
card_dict
.
setdefault
(
'print_time'
,
[])
sd_list
=
card_dict
.
setdefault
(
'start_date'
,
[])
ed_list
=
card_dict
.
setdefault
(
'end_date'
,
[])
verify_list
=
card_dict
.
setdefault
(
'verify'
,
[])
if
summary
[
0
]
is
not
None
:
role_list
.
append
(
summary
[
0
])
role_set
.
add
(
summary
[
0
])
...
...
@@ -214,6 +226,10 @@ class Command(BaseCommand, LoggerMixin):
sd_list
.
append
(
summary
[
5
])
if
summary
[
6
]
is
not
None
:
ed_list
.
append
(
summary
[
6
])
if
len
(
verify_info
)
>
0
:
verify_list
.
append
(
(
pno
,
ino
,
'、'
.
join
(
verify_info
))
)
if
cells_exists
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
...
...
@@ -833,6 +849,7 @@ class Command(BaseCommand, LoggerMixin):
merged_bs_summary
[
main_card
][
'role'
]
.
extend
(
bs_summary
[
card
][
'role'
])
merged_bs_summary
[
main_card
][
'role_set'
]
.
update
(
bs_summary
[
card
][
'role_set'
])
merged_bs_summary
[
main_card
][
'code'
]
.
extend
(
bs_summary
[
card
][
'code'
])
merged_bs_summary
[
main_card
][
'verify'
]
.
extend
(
bs_summary
[
card
][
'verify'
])
merged_bs_summary
[
main_card
][
'print_time'
]
.
extend
(
bs_summary
[
card
][
'print_time'
])
merged_bs_summary
[
main_card
][
'start_date'
]
.
extend
(
bs_summary
[
card
][
'start_date'
])
merged_bs_summary
[
main_card
][
'end_date'
]
.
extend
(
bs_summary
[
card
][
'end_date'
])
...
...
@@ -882,7 +899,8 @@ class Command(BaseCommand, LoggerMixin):
# 'classify': [],
# 'confidence': [],
# 'role': [],
# 'code': [('page', 'code')],
# 'code': [('page', 'code'), ],
# 'verify': [(pno, ino, reason_str), ]
# 'print_time': [],
# 'start_date': [],
# 'end_date': [],
...
...
@@ -896,7 +914,8 @@ class Command(BaseCommand, LoggerMixin):
# 'classify': 0,
# 'confidence': [],
# 'role': '户名',
# 'code': [('page', 'code')],
# 'code': [('page', 'code'), ],
# 'verify': [(pno, ino, reason_str), ]
# 'print_time': [],
# 'start_date': [],
# 'end_date': [],
...
...
@@ -921,6 +940,7 @@ class Command(BaseCommand, LoggerMixin):
# summary_dict['confidence'].extend(summary['confidence'])
summary_dict
[
'role'
]
=
summary
[
'role'
]
summary_dict
[
'code'
]
.
extend
(
summary
[
'code'
])
summary_dict
[
'verify'
]
.
extend
(
summary
[
'verify'
])
summary_dict
[
'print_time'
]
.
extend
(
summary
[
'print_time'
])
summary_dict
[
'start_date'
]
.
extend
(
summary
[
'start_date'
])
summary_dict
[
'end_date'
]
.
extend
(
summary
[
'end_date'
])
...
...
@@ -952,6 +972,7 @@ class Command(BaseCommand, LoggerMixin):
# card_summary['confidence'].extend(summary['confidence'])
card_summary
[
'sheet'
]
.
extend
(
summary
[
'sheet'
])
card_summary
[
'code'
]
.
extend
(
summary
[
'code'
])
card_summary
[
'verify'
]
.
extend
(
summary
[
'verify'
])
card_summary
[
'print_time'
]
.
extend
(
summary
[
'print_time'
])
card_summary
[
'start_date'
]
.
extend
(
summary
[
'start_date'
])
card_summary
[
'end_date'
]
.
extend
(
summary
[
'end_date'
])
...
...
src/apps/doc/ocr/wb.py
View file @
33ea268
...
...
@@ -20,6 +20,7 @@ class BSWorkbook(Workbook):
self
.
meta_sheet_title
=
'Key info'
self
.
blank_row
=
(
None
,)
self
.
code_header
=
(
'页数'
,
'电子回单验证码'
)
self
.
verify_header
=
(
'页数'
,
'图片序号'
,
'检测内容'
)
self
.
date_header
=
(
'打印时间'
,
'起始日期'
,
'终止日期'
,
'流水区间结果'
)
self
.
interest_keyword_header
=
(
'结息关键词'
,
'记账日期'
,
'金额'
)
self
.
salary_keyword_header
=
(
'收入关键词'
,
'记账日期'
,
'金额'
)
...
...
@@ -310,19 +311,28 @@ class BSWorkbook(Workbook):
month_mapping
.
setdefault
(
item
,
[])
.
append
(
(
ws
.
title
,
idx_list
[
i
]
+
min_row
,
idx_list
[
i
+
1
]
+
min_row
-
1
,
day_mean
))
def
build_metadata_rows
(
self
,
confidence
,
code
,
print_time
,
start_date
,
end_date
,
res_count_tuple
):
def
build_metadata_rows
(
self
,
confidence
,
code
,
verify_list
,
print_time
,
start_date
,
end_date
,
res_count_tuple
):
if
start_date
is
None
or
end_date
is
None
:
timedelta
=
None
else
:
timedelta
=
(
end_date
-
start_date
)
.
days
verify_res
=
'正常'
if
len
(
verify_list
)
>
0
:
verify_res
=
'疑似伪造'
metadata_rows
=
[
(
'流水识别置信度'
,
confidence
),
(
'流水检测结果'
,
verify_res
),
(
'图片总数'
,
res_count_tuple
[
0
]),
(
'识别成功'
,
res_count_tuple
[
1
]),
self
.
blank_row
,
self
.
code_header
,
]
metadata_rows
.
extend
(
code
)
if
len
(
verify_list
)
>
0
:
metadata_rows
.
append
(
self
.
verify_header
)
metadata_rows
.
extend
(
verify_list
)
metadata_rows
.
extend
(
[
self
.
blank_row
,
self
.
date_header
,
...
...
@@ -332,18 +342,21 @@ class BSWorkbook(Workbook):
)
return
metadata_rows
def
build_meta_sheet
(
self
,
role_name
,
card
,
confidence
,
code
,
print_time
,
start_date
,
end_date
,
res_count_tuple
):
metadata_rows
=
self
.
build_metadata_rows
(
confidence
,
code
,
print_time
,
start_date
,
end_date
,
res_count_tuple
)
def
build_meta_sheet
(
self
,
role_name
,
card
,
confidence
,
code
,
verify_list
,
print_time
,
start_date
,
end_date
,
res_count_tuple
):
metadata_rows
=
self
.
build_metadata_rows
(
confidence
,
code
,
verify_list
,
print_time
,
start_date
,
end_date
,
res_count_tuple
)
if
not
isinstance
(
role_name
,
str
):
role_name
=
consts
.
UNKNOWN_ROLE
ms
=
self
.
create_sheet
(
'{0}{1}({2})'
.
format
(
self
.
meta_sheet_title
,
role_name
,
card
))
for
row
in
metadata_rows
:
ms
.
append
(
row
)
if
res_count_tuple
[
0
]
!=
res_count_tuple
[
1
]
:
if
len
(
verify_list
)
>
0
:
for
cell
in
ms
[
2
]:
cell
.
fill
=
self
.
amount_fill
if
res_count_tuple
[
0
]
!=
res_count_tuple
[
1
]:
for
cell
in
ms
[
3
]:
cell
.
fill
=
self
.
amount_fill
for
cell
in
ms
[
4
]:
cell
.
fill
=
self
.
amount_fill
return
ms
@staticmethod
...
...
@@ -608,7 +621,8 @@ class BSWorkbook(Workbook):
# 'classify': 0,
# 'confidence': 0.9,
# 'role': '柳雪',
# 'code': [('page', 'code')],
# 'code': [('page', 'code'), ],
# 'verify': [(pno, ino, reason_str), ]
# 'print_time': 'datetime',
# 'start_date': 'datetime',
# 'end_date': 'datetime',
...
...
@@ -661,6 +675,7 @@ class BSWorkbook(Workbook):
new_card
,
confidence
,
summary
.
get
(
'code'
),
summary
.
get
(
'verify'
),
summary
.
get
(
'print_time'
),
start_date
,
end_date
,
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment