Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
fe23c4f5
authored
2021-09-08 11:04:33 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add contract
1 parent
73c957c3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
55 additions
and
7 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
fe23c4f
...
...
@@ -1603,4 +1603,4 @@ CA_COMPARE_FIELD = {
DL_EN
:
(
DL_OCR_FIELD
,
CA_DL_COMPARE_LOGIC
,
False
),
}
CONTRACT_SHEET_NAME
=
'合同'
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
fe23c4f
...
...
@@ -208,7 +208,43 @@ class Command(BaseCommand, LoggerMixin):
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
contract_process
(
self
,
ocr_data
,
contract_result
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
pass
contract_dict
=
ocr_data
.
get
(
'data'
)
if
not
contract_dict
or
'page_num'
not
in
contract_dict
or
'page_info'
not
in
contract_dict
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
return
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS
))
page_num
=
contract_dict
.
get
(
'page_num'
)
rebuild_page_info
=
[]
text_key
=
'words'
for
key
,
value
in
contract_dict
.
get
(
'page_info'
,
{})
.
items
():
if
value
is
None
:
rebuild_page_info
.
append
((
key
,
))
elif
text_key
in
value
:
if
value
[
text_key
]
is
None
:
rebuild_page_info
.
append
((
key
,))
elif
isinstance
(
value
[
text_key
],
str
):
rebuild_page_info
.
append
((
key
,
value
[
text_key
]))
elif
isinstance
(
value
[
text_key
],
list
):
rebuild_page_info
.
append
((
key
,))
for
row_list
in
value
[
text_key
]:
rebuild_page_info
.
append
(
row_list
)
else
:
rebuild_page_info
.
append
((
key
,))
for
sub_key
,
sub_value
in
value
.
items
():
if
sub_value
is
None
:
rebuild_page_info
.
append
((
sub_key
,))
elif
text_key
in
sub_value
:
if
sub_value
[
text_key
]
is
None
:
rebuild_page_info
.
append
((
sub_key
,))
elif
isinstance
(
sub_value
[
text_key
],
str
):
rebuild_page_info
.
append
((
sub_key
,
sub_value
[
text_key
]))
elif
isinstance
(
sub_value
[
text_key
],
list
):
rebuild_page_info
.
append
((
sub_key
,))
for
row_list
in
sub_value
[
text_key
]:
rebuild_page_info
.
append
(
row_list
)
contract_result
.
setdefault
(
page_num
,
[])
.
append
(
rebuild_page_info
)
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
# 类别:'0'身份证, '1'居住证
...
...
@@ -943,9 +979,9 @@ class Command(BaseCommand, LoggerMixin):
# self.bs_log.info('[task={0}] [bs_summary={1}]'.format(task_str, merged_bs_summary))
self
.
online_log
.
info
(
'{0} [task={1}] [merged_bs_summary={2}] [license_summary={3}] '
'[res_list={
4
}]'
.
format
(
self
.
log_base
,
task_str
,
merged_bs_summary
,
license_summary
,
res_list
))
self
.
online_log
.
info
(
'{0} [task={1}] [merged_bs_summary={2}] [license_summary={3}]
[contract={4}]
'
'[res_list={
5
}]'
.
format
(
self
.
log_base
,
task_str
,
merged_bs_summary
,
contract_result
,
license_summary
,
res_list
))
except
Exception
as
e
:
...
...
@@ -966,7 +1002,7 @@ class Command(BaseCommand, LoggerMixin):
# 重构Excel文件
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
)
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
...
...
src/apps/doc/ocr/wb.py
View file @
fe23c4f
...
...
@@ -702,6 +702,17 @@ class BSWorkbook(Workbook):
if
field_str
is
not
None
:
count_list
.
append
((
field_str
,
count
))
def
contract_rebuild
(
self
,
contract_result
):
if
len
(
contract_result
)
==
0
:
return
ws
=
self
.
create_sheet
(
consts
.
CONTRACT_SHEET_NAME
)
for
page_num
,
info_list
in
contract_result
:
ws
.
append
(
'page {0}'
.
format
(
page_num
))
for
info
in
info_list
:
for
row
in
info
:
ws
.
append
(
row
)
ws
.
append
((
None
,
))
@staticmethod
def
remove_yuan
(
amount_key_set
,
key
,
src_str
):
if
key
in
amount_key_set
and
isinstance
(
src_str
,
str
):
...
...
@@ -801,12 +812,13 @@ class BSWorkbook(Workbook):
if
len
(
self
.
sheetnames
)
>
1
:
self
.
remove
(
self
.
get_sheet_by_name
(
'Sheet'
))
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
):
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
):
res_count_tuple
=
self
.
res_sheet
(
res_list
)
count_list
=
[(
consts
.
MODEL_FIELD_BS
,
len
(
bs_summary
))]
if
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
contract_rebuild
(
contract_result
)
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
)
else
:
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment