Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
1e6d9aa7
authored
2025-05-12 17:47:46 +0800
by
chenyao
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
更新保险单保单章不在首页的情况,处理代码换位置
1 parent
78456885
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
37 additions
and
36 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/management/commands/ocr_process.py
View file @
1e6d9aa
...
...
@@ -2055,6 +2055,43 @@ class Command(BaseCommand, LoggerMixin):
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
#need_follow表示在上传edms时文件名是否要添加"关注"两字
# 处理 保单 的后续页的章识别不到的问题
if
len
(
license_summary
)
>
0
:
# 如果是保单
if
consts
.
INSURANCE_CLASSIFY
in
license_summary
.
keys
():
# 获取 license_list
license_list_tmp
=
license_summary
.
get
(
consts
.
INSURANCE_CLASSIFY
,
[])
if
len
(
license_list_tmp
)
>
0
:
# 章的有无
stamp_flag_list
=
[
0
]
*
len
(
license_list_tmp
)
for
license_list_tmp_idx
,
license_dict_tmp
in
enumerate
(
license_list_tmp
):
if
isinstance
(
license_dict_tmp
,
dict
):
if
"保单章"
in
license_dict_tmp
.
keys
():
if
license_dict_tmp
.
get
(
"保单章"
)
is
not
None
:
if
isinstance
(
license_dict_tmp
.
get
(
"保单章"
),
str
)
and
license_dict_tmp
.
get
(
"保单章"
)
==
"有"
:
stamp_flag_list
[
license_list_tmp_idx
]
=
1
# 计算 stamp_flag_list 中的 sum
stamp_flag_sum
=
sum
(
stamp_flag_list
)
# 检查 license_list_tmp 中的每个元素中 [被保险人姓名] 的值是否为空, 若为空, 则不是第一页
license_first_page_list
=
[
0
]
*
len
(
license_list_tmp
)
for
license_list_tmp_idx
,
license_dict_tmp
in
enumerate
(
license_list_tmp
):
if
isinstance
(
license_dict_tmp
,
dict
):
if
"被保险人姓名"
in
license_dict_tmp
.
keys
():
if
license_dict_tmp
.
get
(
"被保险人姓名"
)
is
not
None
:
if
isinstance
(
license_dict_tmp
.
get
(
"被保险人姓名"
),
str
)
and
license_dict_tmp
.
get
(
"被保险人姓名"
)
!=
""
:
license_first_page_list
[
license_list_tmp_idx
]
=
1
break
# 获取 license_first_page_list 中为 1 的索引
license_first_page_idx
=
[
i
for
i
,
x
in
enumerate
(
license_first_page_list
)
if
x
==
1
]
# 将 license_summary 中key为 INSURANCE_CLASSIFY 的 value 替换为 first_page 中的内容, 且只保留 1 页
if
len
(
license_first_page_idx
)
>
0
:
license_summary
[
consts
.
INSURANCE_CLASSIFY
]
=
[
license_list_tmp
[
license_first_page_idx
[
0
]]]
# 如果 sum > 0, 说明有章, 无论是在第几页, 接下来将第一页的内容提取出来,只保留第一页
if
stamp_flag_sum
>
0
:
license_summary
[
consts
.
INSURANCE_CLASSIFY
][
0
][
"保单章"
]
=
"有"
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
,
financial_statement_dict
,
financial_explanation_dict
,
down_payment_dict
)
wb
.
save
(
excel_path
)
...
...
@@ -2155,42 +2192,6 @@ class Command(BaseCommand, LoggerMixin):
if
len
(
bs_rebuild
)
>
0
:
license_summary
[
consts
.
BS_CLASSIFY
]
=
bs_rebuild
# 处理 保单 的后续页的章识别不到的问题
if
len
(
license_summary
)
>
0
:
# 如果是保单
if
consts
.
INSURANCE_CLASSIFY
in
license_summary
.
keys
():
# 获取 license_list
license_list_tmp
=
license_summary
.
get
(
consts
.
INSURANCE_CLASSIFY
,
[])
if
len
(
license_list_tmp
)
>
0
:
# 章的有无
stamp_flag_list
=
[
0
]
*
len
(
license_list_tmp
)
for
license_list_tmp_idx
,
license_dict_tmp
in
enumerate
(
license_list_tmp
):
if
isinstance
(
license_dict_tmp
,
dict
):
if
"保单章"
in
license_dict_tmp
.
keys
():
if
license_dict_tmp
.
get
(
"保单章"
)
is
not
None
:
if
isinstance
(
license_dict_tmp
.
get
(
"保单章"
),
str
)
and
license_dict_tmp
.
get
(
"保单章"
)
==
"有"
:
stamp_flag_list
[
license_list_tmp_idx
]
=
1
# 计算 stamp_flag_list 中的 sum
stamp_flag_sum
=
sum
(
stamp_flag_list
)
# 检查 license_list_tmp 中的每个元素中 [被保险人姓名] 的值是否为空, 若为空, 则不是第一页
license_first_page_list
=
[
0
]
*
len
(
license_list_tmp
)
for
license_list_tmp_idx
,
license_dict_tmp
in
enumerate
(
license_list_tmp
):
if
isinstance
(
license_dict_tmp
,
dict
):
if
"被保险人姓名"
in
license_dict_tmp
.
keys
():
if
license_dict_tmp
.
get
(
"被保险人姓名"
)
is
not
None
:
if
isinstance
(
license_dict_tmp
.
get
(
"被保险人姓名"
),
str
)
and
license_dict_tmp
.
get
(
"被保险人姓名"
)
!=
""
:
license_first_page_list
[
license_list_tmp_idx
]
=
1
break
# 获取 license_first_page_list 中为 1 的索引
license_first_page_idx
=
[
i
for
i
,
x
in
enumerate
(
license_first_page_list
)
if
x
==
1
]
# 将 license_summary 中key为 INSURANCE_CLASSIFY 的 value 替换为 first_page 中的内容, 且只保留 1 页
if
len
(
license_first_page_idx
)
>
0
:
license_summary
[
consts
.
INSURANCE_CLASSIFY
]
=
[
license_list_tmp
[
license_first_page_idx
[
0
]]]
# 如果 sum > 0, 说明有章, 无论是在第几页, 接下来将第一页的内容提取出来,只保留第一页
if
stamp_flag_sum
>
0
:
license_summary
[
consts
.
INSURANCE_CLASSIFY
][
0
][
"保单章"
]
=
"有"
# 比对
if
len
(
license_summary
)
>
0
or
classify
in
consts
.
NEW_FILE_COMPARE_SET
:
if
doc
.
document_scheme
!=
consts
.
DOC_SCHEME_LIST
[
2
]:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment