Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
d2a1f3f2
authored
2024-08-12 10:57:57 +0800
by
chenyao
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
添加财报情况说明的内容存储到数据库和生成到Excel中
1 parent
efad9db2
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
64 additions
and
9 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
d2a1f3f
...
...
@@ -2538,4 +2538,9 @@ FSM_ACTIVITED_STATUS = {
# 财务报表分类标签
FINANCIAL_STATEMENT_CLASSIFY_LIST
=
[
97
,
98
,
99
]
# 财务报表sheet名称
FINANCIAL_SHEET_NAME
=
"财务报表"
\ No newline at end of file
FINANCIAL_SHEET_NAME
=
"财务报表"
# 财报情况说明分类标签
FINANCIAL_EXPLANATION_CLASSIFY_LIST
=
[
100
]
# 财报情况说明sheet名称
FINANCIAL_EXPLANATION_SHEET_NAME
=
"财报情况说明"
\ No newline at end of file
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
d2a1f3f
...
...
@@ -1726,6 +1726,8 @@ class Command(BaseCommand, LoggerMixin):
contract_result_compare
=
{}
# 添加财报三个报表的处理
financial_statement_dict
=
{}
# 添加财报情况说明的处理
financial_explanation_dict
=
{}
res_list
=
[]
interest_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
INTEREST
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
...
...
@@ -1778,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin):
if
"stamp"
in
ocr_data
:
stamp
=
ocr_data
.
get
(
"stamp"
,
""
)
financial_statement_dict
[
"stamp"
][
financial_statement_table_name
]
=
stamp
elif
classify
in
consts
.
FINANCIAL_EXPLANATION_CLASSIFY_LIST
:
# 添加到 res_list 中
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_FINANCIAL_STATEMENT
))
# 只要分类为财报情况说明的,就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict
if
"title"
not
in
financial_explanation_dict
:
financial_explanation_dict
[
"title"
]
=
{}
if
"stamp"
not
in
financial_explanation_dict
:
financial_explanation_dict
[
"stamp"
]
=
{}
if
"title"
in
ocr_data
:
title
=
ocr_data
.
get
(
"title"
,
""
)
financial_explanation_dict
[
"title"
]
=
title
if
"stamp"
in
ocr_data
:
stamp
=
ocr_data
.
get
(
"stamp"
,
""
)
financial_explanation_dict
[
"stamp"
]
=
stamp
elif
classify
in
consts
.
OTHER_CLASSIFY_SET
:
# 其他类
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
...
...
@@ -1964,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin):
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
#need_follow表示在上传edms时文件名是否要添加"关注"两字
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
,
financial_statement_dict
)
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
,
financial_statement_dict
,
financial_explanation_dict
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
...
...
@@ -2073,10 +2090,10 @@ class Command(BaseCommand, LoggerMixin):
# 更新OCR累计识别结果表
if
business_type
==
consts
.
HIL_PREFIX
:
result_class
=
HILOCRResult
if
is_ca
else
HILSEOCRResult
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
)
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
else
:
result_class
=
AFCOCRResult
if
is_ca
else
AFCSEOCRResult
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
)
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
self
.
online_log
.
error
(
...
...
@@ -2115,10 +2132,10 @@ class Command(BaseCommand, LoggerMixin):
# 更新OCR累计识别结果表
if
business_type
==
consts
.
HIL_PREFIX
:
result_class
=
HILOCRResult
if
is_ca
else
HILSEOCRResult
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
)
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
else
:
result_class
=
AFCOCRResult
if
is_ca
else
AFCSEOCRResult
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
)
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (ocr result save)] [task={1}] [error={2}]'
.
format
(
...
...
@@ -2469,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [stop safely]'
.
format
(
self
.
log_base
))
@transaction.atomic
def
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
):
def
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
):
with
transaction
.
atomic
(
'afc'
):
res_obj
=
result_class
.
objects
.
using
(
'afc'
)
.
select_for_update
()
.
filter
(
application_id
=
doc
.
application_id
)
.
first
()
self
.
online_log
.
info
(
'{0} [sql lock AFC application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
...
...
@@ -2477,9 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
res_obj
=
result_class
()
res_obj
.
application_id
=
doc
.
application_id
self
.
online_log
.
info
(
'{0} [res_obj is None application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
# 财务报表存入数据库
if
res_obj
is
not
None
:
if
financial_statement_dict
:
res_obj
.
fs_ocr
=
json
.
dumps
([
financial_statement_dict
])
# 财报情况说明存入数据库
if
res_obj
is
not
None
:
if
financial_explanation_dict
:
res_obj
.
fss_ocr
=
json
.
dumps
([
financial_explanation_dict
])
for
classify
,
field
in
consts
.
RESULT_MAPPING
.
items
():
if
not
hasattr
(
res_obj
,
field
):
continue
...
...
@@ -2505,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
return
res_obj
@transaction.atomic
def
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
):
def
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
):
with
transaction
.
atomic
(
'default'
):
res_obj
=
result_class
.
objects
.
using
(
'default'
)
.
select_for_update
()
.
filter
(
application_id
=
doc
.
application_id
)
.
first
()
self
.
online_log
.
info
(
'{0} [sql lock HIL application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
...
...
@@ -2513,9 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
res_obj
=
result_class
()
res_obj
.
application_id
=
doc
.
application_id
self
.
online_log
.
info
(
'{0} [res_obj is None application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
# 财务报表三个表存入数据库
if
res_obj
is
not
None
:
if
financial_statement_dict
:
res_obj
.
fs_ocr
=
json
.
dumps
([
financial_statement_dict
])
# 财报情况说明存入数据库
if
res_obj
is
not
None
:
if
financial_explanation_dict
:
res_obj
.
fss_ocr
=
json
.
dumps
([
financial_explanation_dict
])
for
classify
,
field
in
consts
.
RESULT_MAPPING
.
items
():
if
not
hasattr
(
res_obj
,
field
):
continue
...
...
src/apps/doc/ocr/wb.py
View file @
d2a1f3f
...
...
@@ -853,6 +853,27 @@ class BSWorkbook(Workbook):
ws
.
append
(
row
)
def
financial_explanation_rebuild
(
self
,
financial_explanation_dict
):
"""
Desc:
重构财报情况说明sheet
"""
# 如果 financial_explanation_dict 为空,则不创建sheet
if
not
financial_explanation_dict
:
return
# 如果 financial_explanation_dict 不为空, 则创建sheet
ws
=
self
.
create_sheet
(
consts
.
FINANCIAL_EXPLANATION_SHEET_NAME
)
for
fin_key
,
fin_value
in
financial_explanation_dict
.
items
():
table_str
=
"公司名称"
if
fin_key
==
"title"
:
table_str
=
"公司名称"
elif
fin_key
==
"stamp"
:
table_str
=
"印章"
row
=
[
"财报情况说明"
+
table_str
,
str
(
fin_value
)]
ws
.
append
(
row
)
@staticmethod
def
remove_yuan
(
amount_key_set
,
key
,
src_str
):
if
key
in
amount_key_set
and
isinstance
(
src_str
,
str
):
...
...
@@ -952,7 +973,7 @@ class BSWorkbook(Workbook):
if
len
(
self
.
sheetnames
)
>
1
:
self
.
remove
(
self
.
get_sheet_by_name
(
'Sheet'
))
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
,
metadata
,
financial_statement_dict
):
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
,
metadata
,
financial_statement_dict
,
financial_explanation_dict
):
res_count_tuple
=
self
.
res_sheet
(
res_list
)
count_list
=
[(
consts
.
MODEL_FIELD_BS
,
len
(
bs_summary
))]
...
...
@@ -961,11 +982,13 @@ class BSWorkbook(Workbook):
self
.
contract_rebuild
(
contract_result
)
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
,
metadata
)
self
.
financial_rebuild
(
financial_statement_dict
)
self
.
financial_explanation_rebuild
(
financial_explanation_dict
)
else
:
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
,
metadata
)
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
contract_rebuild
(
contract_result
,
True
)
self
.
financial_rebuild
(
financial_statement_dict
)
self
.
financial_explanation_rebuild
(
financial_explanation_dict
)
self
.
move_res_sheet
()
self
.
remove_base_sheet
()
return
count_list
,
self
.
need_follow
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment