Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
5fbb652c
authored
2021-09-08 11:05:57 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/contract' into feature/0611
2 parents
a84f1276
fe23c4f5
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
6 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
5fbb652
...
...
@@ -1002,6 +1002,10 @@ JYPZ_FIELD_ORDER = (("type", "标题"),
(
"stamp_signature_seller"
,
"出卖方签字/盖章"
),
(
"stamp_signature_agent"
,
"经销商签字/盖章"
),)
# 合同
CONTRACT_CN_NAME
=
'合同'
CONTRACT_CLASSIFY
=
41
SUCCESS_CODE_SET
=
{
'0'
,
0
}
FIELD_ORDER_MAP
=
{
...
...
@@ -1628,4 +1632,4 @@ CA_COMPARE_FIELD = {
DL_EN
:
(
DL_OCR_FIELD
,
CA_DL_COMPARE_LOGIC
,
False
),
}
CONTRACT_SHEET_NAME
=
'合同'
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
5fbb652
...
...
@@ -207,6 +207,45 @@ class Command(BaseCommand, LoggerMixin):
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
contract_process
(
self
,
ocr_data
,
contract_result
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
contract_dict
=
ocr_data
.
get
(
'data'
)
if
not
contract_dict
or
'page_num'
not
in
contract_dict
or
'page_info'
not
in
contract_dict
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
page_num
=
contract_dict
.
get
(
'page_num'
)
rebuild_page_info
=
[]
text_key
=
'words'
for
key
,
value
in
contract_dict
.
get
(
'page_info'
,
{})
.
items
():
if
value
is
None
:
rebuild_page_info
.
append
((
key
,
))
elif
text_key
in
value
:
if
value
[
text_key
]
is
None
:
rebuild_page_info
.
append
((
key
,))
elif
isinstance
(
value
[
text_key
],
str
):
rebuild_page_info
.
append
((
key
,
value
[
text_key
]))
elif
isinstance
(
value
[
text_key
],
list
):
rebuild_page_info
.
append
((
key
,))
for
row_list
in
value
[
text_key
]:
rebuild_page_info
.
append
(
row_list
)
else
:
rebuild_page_info
.
append
((
key
,))
for
sub_key
,
sub_value
in
value
.
items
():
if
sub_value
is
None
:
rebuild_page_info
.
append
((
sub_key
,))
elif
text_key
in
sub_value
:
if
sub_value
[
text_key
]
is
None
:
rebuild_page_info
.
append
((
sub_key
,))
elif
isinstance
(
sub_value
[
text_key
],
str
):
rebuild_page_info
.
append
((
sub_key
,
sub_value
[
text_key
]))
elif
isinstance
(
sub_value
[
text_key
],
list
):
rebuild_page_info
.
append
((
sub_key
,))
for
row_list
in
sub_value
[
text_key
]:
rebuild_page_info
.
append
(
row_list
)
contract_result
.
setdefault
(
page_num
,
[])
.
append
(
rebuild_page_info
)
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
license_data
=
ocr_data
.
get
(
'data'
)
...
...
@@ -820,6 +859,7 @@ class Command(BaseCommand, LoggerMixin):
bs_summary
=
{}
unknown_summary
=
{}
license_summary
=
{}
contract_result
=
{}
res_list
=
[]
interest_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
INTEREST
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
...
...
@@ -907,6 +947,9 @@ class Command(BaseCommand, LoggerMixin):
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
self
.
online_log
.
warn
(
'{0} [ocr_2 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
elif
classify
==
consts
.
CONTRACT_CLASSIFY
:
self
.
contract_process
(
ocr_data
,
contract_result
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
)
else
:
# 流水处理
bs_classify_set
.
add
(
classify
)
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
...
...
@@ -936,9 +979,9 @@ class Command(BaseCommand, LoggerMixin):
# self.bs_log.info('[task={0}] [bs_summary={1}]'.format(task_str, merged_bs_summary))
self
.
online_log
.
info
(
'{0} [task={1}] [merged_bs_summary={2}] [license_summary={3}] '
'[res_list={
4
}]'
.
format
(
self
.
log_base
,
task_str
,
merged_bs_summary
,
license_summary
,
res_list
))
self
.
online_log
.
info
(
'{0} [task={1}] [merged_bs_summary={2}] [license_summary={3}]
[contract={4}]
'
'[res_list={
5
}]'
.
format
(
self
.
log_base
,
task_str
,
merged_bs_summary
,
contract_result
,
license_summary
,
res_list
))
except
Exception
as
e
:
...
...
@@ -959,7 +1002,7 @@ class Command(BaseCommand, LoggerMixin):
# 重构Excel文件
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
)
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
...
...
src/apps/doc/ocr/wb.py
View file @
5fbb652
...
...
@@ -702,6 +702,17 @@ class BSWorkbook(Workbook):
if
field_str
is
not
None
:
count_list
.
append
((
field_str
,
count
))
def
contract_rebuild
(
self
,
contract_result
):
if
len
(
contract_result
)
==
0
:
return
ws
=
self
.
create_sheet
(
consts
.
CONTRACT_SHEET_NAME
)
for
page_num
,
info_list
in
contract_result
:
ws
.
append
(
'page {0}'
.
format
(
page_num
))
for
info
in
info_list
:
for
row
in
info
:
ws
.
append
(
row
)
ws
.
append
((
None
,
))
@staticmethod
def
remove_yuan
(
amount_key_set
,
key
,
src_str
):
if
key
in
amount_key_set
and
isinstance
(
src_str
,
str
):
...
...
@@ -801,12 +812,13 @@ class BSWorkbook(Workbook):
if
len
(
self
.
sheetnames
)
>
1
:
self
.
remove
(
self
.
get_sheet_by_name
(
'Sheet'
))
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
):
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
):
res_count_tuple
=
self
.
res_sheet
(
res_list
)
count_list
=
[(
consts
.
MODEL_FIELD_BS
,
len
(
bs_summary
))]
if
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
contract_rebuild
(
contract_result
)
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
)
else
:
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment