Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
7b915143
authored
2024-08-06 17:22:49 +0800
by
chenyao
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
添加4659-财务报表开发的三个表的识别,情况说明并未开发完成
1 parent
0a08bff7
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
55 additions
and
7 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
7b91514
...
...
@@ -2532,4 +2532,9 @@ FSM_ACTIVITED_STATUS = {
"APIPN"
:
"Activated-Invoice Passed-Non PT"
,
"APIPP"
:
"Activated-Invoice Passed-PT Doc Required"
,
"APARD"
:
"Activated-Review done"
,
}
\ No newline at end of file
}
# 财务报表分类标签
FINANCIAL_STATEMENT_CLASSIFY_LIST
=
[
97
,
98
,
99
]
# 财务报表sheet名称
FINANCIAL_SHEET_NAME
=
"财务报表"
\ No newline at end of file
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
7b91514
...
...
@@ -1724,6 +1724,8 @@ class Command(BaseCommand, LoggerMixin):
license_summary
=
{}
contract_result
=
{}
contract_result_compare
=
{}
# 添加财报三个报表的处理
financial_statement_dict
=
{
"code"
:
{},
"stamp"
:
{}}
res_list
=
[]
interest_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
INTEREST
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
...
...
@@ -1753,6 +1755,21 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
warn
(
'{0} [ocr_1 res error] [img={1}]'
.
format
(
self
.
log_base
,
img_path
))
continue
elif
classify
in
consts
.
FINANCIAL_STATEMENT_CLASSIFY_LIST
:
financial_statement_table_name
=
None
if
classify
==
97
:
financial_statement_table_name
=
"balance_sheet"
elif
classify
==
98
:
financial_statement_table_name
=
"income_statement"
elif
classify
==
99
:
financial_statement_table_name
=
"cash_flow_statement"
if
financial_statement_table_name
is
not
None
:
if
"id_code"
in
ocr_data
:
id_code
=
ocr_data
.
get
(
"id_code"
,
""
)
financial_statement_dict
[
"code"
][
financial_statement_table_name
]
=
id_code
if
"stamp"
in
ocr_data
:
stamp
=
ocr_data
.
get
(
"stamp"
,
""
)
financial_statement_dict
[
"stamp"
][
financial_statement_table_name
]
=
stamp
elif
classify
in
consts
.
OTHER_CLASSIFY_SET
:
# 其他类
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
...
...
@@ -1939,7 +1956,7 @@ class Command(BaseCommand, LoggerMixin):
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
#need_follow表示在上传edms时文件名是否要添加"关注"两字
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
)
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
,
financial_statement_dict
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
...
...
@@ -2046,10 +2063,10 @@ class Command(BaseCommand, LoggerMixin):
# 更新OCR累计识别结果表
if
business_type
==
consts
.
HIL_PREFIX
:
result_class
=
HILOCRResult
if
is_ca
else
HILSEOCRResult
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
)
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
)
else
:
result_class
=
AFCOCRResult
if
is_ca
else
AFCSEOCRResult
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
)
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
)
except
Exception
as
e
:
self
.
online_log
.
error
(
...
...
@@ -2426,7 +2443,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [stop safely]'
.
format
(
self
.
log_base
))
@transaction.atomic
def
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
):
def
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
):
with
transaction
.
atomic
(
'afc'
):
res_obj
=
result_class
.
objects
.
using
(
'afc'
)
.
select_for_update
()
.
filter
(
application_id
=
doc
.
application_id
)
.
first
()
self
.
online_log
.
info
(
'{0} [sql lock AFC application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
...
...
@@ -2434,6 +2451,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
res_obj
=
result_class
()
res_obj
.
application_id
=
doc
.
application_id
self
.
online_log
.
info
(
'{0} [res_obj is None application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
res_obj
.
fs_ocr
=
[
financial_statement_dict
]
for
classify
,
field
in
consts
.
RESULT_MAPPING
.
items
():
if
not
hasattr
(
res_obj
,
field
):
continue
...
...
@@ -2459,7 +2477,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
return
res_obj
@transaction.atomic
def
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
):
def
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
):
with
transaction
.
atomic
(
'default'
):
res_obj
=
result_class
.
objects
.
using
(
'default'
)
.
select_for_update
()
.
filter
(
application_id
=
doc
.
application_id
)
.
first
()
self
.
online_log
.
info
(
'{0} [sql lock HIL application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
...
...
@@ -2467,6 +2485,7 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
res_obj
=
result_class
()
res_obj
.
application_id
=
doc
.
application_id
self
.
online_log
.
info
(
'{0} [res_obj is None application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
res_obj
.
fs_ocr
=
[
financial_statement_dict
]
for
classify
,
field
in
consts
.
RESULT_MAPPING
.
items
():
if
not
hasattr
(
res_obj
,
field
):
continue
...
...
src/apps/doc/ocr/wb.py
View file @
7b91514
...
...
@@ -827,6 +827,28 @@ class BSWorkbook(Workbook):
ws
.
append
(
row
)
ws
.
append
((
None
,
))
def
financial_rebuild
(
self
,
financial_statement_dict
):
ws
=
self
.
create_sheet
(
consts
.
FINANCIAL_SHEET_NAME
)
for
fin_key
,
fin_value
in
financial_statement_dict
.
items
():
table_str
=
"识别码"
if
fin_key
==
"code"
:
table_str
=
"识别码"
elif
fin_key
==
"stamp"
:
table_str
=
"印章"
for
table_key
,
table_value
in
fin_value
.
items
():
if
table_key
==
"balance_sheet"
:
row
=
[
"资产负债表"
+
table_str
,
str
(
table_value
)]
ws
.
append
(
row
)
elif
table_key
==
"income_statement"
:
row
=
[
"利润表"
+
table_str
,
str
(
table_value
)]
ws
.
append
(
row
)
elif
table_key
==
"cash_flow_statement"
:
row
=
[
"现金流量表"
+
table_str
,
str
(
table_value
)]
ws
.
append
(
row
)
@staticmethod
def
remove_yuan
(
amount_key_set
,
key
,
src_str
):
if
key
in
amount_key_set
and
isinstance
(
src_str
,
str
):
...
...
@@ -926,7 +948,7 @@ class BSWorkbook(Workbook):
if
len
(
self
.
sheetnames
)
>
1
:
self
.
remove
(
self
.
get_sheet_by_name
(
'Sheet'
))
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
,
metadata
):
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
,
metadata
,
financial_statement_dict
):
res_count_tuple
=
self
.
res_sheet
(
res_list
)
count_list
=
[(
consts
.
MODEL_FIELD_BS
,
len
(
bs_summary
))]
...
...
@@ -934,10 +956,12 @@ class BSWorkbook(Workbook):
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
contract_rebuild
(
contract_result
)
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
,
metadata
)
self
.
financial_rebuild
(
financial_statement_dict
)
else
:
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
,
metadata
)
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
contract_rebuild
(
contract_result
,
True
)
self
.
financial_rebuild
(
financial_statement_dict
)
self
.
move_res_sheet
()
self
.
remove_base_sheet
()
return
count_list
,
self
.
need_follow
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment