Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
9f51de96
authored
2025-02-14 22:08:18 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
merge 5075-chenyao
2 parents
d6fc968c
ff8e7380
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
81 additions
and
5 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
9f51de9
...
...
@@ -99,6 +99,7 @@ RES_SUCCESS = '识别成功'
RES_SUCCESS_OTHER
=
'识别成功(其他类)'
RES_SUCCESS_EMPTY
=
'识别成功(空数据)'
RES_SUCCESS_FINANCIAL_STATEMENT
=
'识别成功(财务报表类)'
RES_SUCCESS_DOWN_PAYMENT
=
'识别成功(首付款支付承诺书类)'
RES_FAILED
=
'识别失败'
RES_FAILED_1
=
'识别失败(阶段1)'
RES_FAILED_2
=
'识别失败(阶段2)'
...
...
@@ -2586,6 +2587,13 @@ FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100]
# 财报情况说明sheet名称
FINANCIAL_EXPLANATION_SHEET_NAME
=
"财报情况说明"
# 首付款支付承诺书分类标签
DOWN_PAYMENT_CLASSIFY_LIST
=
[
96
]
# 首付款支付承诺书sheet名称
DOWN_PAYMENT_SHEET_NAME
=
"首付款支付承诺书"
NEW_FILE_COMPARE_SET
=
[
96
]
# Jira-4562 - 银行流水首页提取关键词
INCOME_KEYWORDS_LIST
=
[
"养老金"
,
"社保"
,
"代发工资"
,
"工资入账"
,
"奖金"
,
"养老保险"
,
"代发"
,
"工资"
]
INCOME_KEYWORDS_DICT
=
{
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
9f51de9
...
...
@@ -1768,6 +1768,8 @@ class Command(BaseCommand, LoggerMixin):
financial_statement_dict
=
{}
# 添加财报情况说明的处理
financial_explanation_dict
=
{}
# 添加首付款支付承诺书的处理
down_payment_dict
=
{}
res_list
=
[]
interest_keyword
=
Keywords
.
objects
.
filter
(
type
=
KeywordsType
.
INTEREST
.
value
,
on_off
=
True
)
.
values_list
(
'keyword'
,
flat
=
True
)
...
...
@@ -1835,6 +1837,31 @@ class Command(BaseCommand, LoggerMixin):
if
"stamp"
in
ocr_data
:
stamp
=
ocr_data
.
get
(
"stamp"
,
""
)
financial_explanation_dict
[
"stamp"
]
=
stamp
elif
classify
in
consts
.
DOWN_PAYMENT_CLASSIFY_LIST
:
# 添加到 res_list 中
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_DOWN_PAYMENT
))
# 1-金融机构名称
financial_org_name
=
ocr_data
.
get
(
"financial_org_name"
,
""
)
down_payment_dict
[
"financial_org_name"
]
=
financial_org_name
# 2-主借人姓名
main_borrower_name
=
ocr_data
.
get
(
"main_borrower_name"
,
""
)
down_payment_dict
[
"main_borrower_name"
]
=
main_borrower_name
# 3-主借人证件号码
main_borrower_id_no
=
ocr_data
.
get
(
"main_borrower_id_no"
,
""
)
down_payment_dict
[
"main_borrower_id_no"
]
=
main_borrower_id_no
# 4-申请编号
apply_no
=
ocr_data
.
get
(
"apply_no"
,
""
)
down_payment_dict
[
"apply_no"
]
=
apply_no
# 5-抵押/租赁合同名称
contract_name
=
ocr_data
.
get
(
"contract_name"
,
""
)
down_payment_dict
[
"contract_name"
]
=
contract_name
# 6-承诺人签字
promisor_signature
=
ocr_data
.
get
(
"promisor_signature"
,
""
)
down_payment_dict
[
"promisor_signature"
]
=
promisor_signature
# 7-承诺人签字日期
promisor_signature_date
=
ocr_data
.
get
(
"promisor_signature_date"
,
""
)
down_payment_dict
[
"promisor_signature_date"
]
=
promisor_signature_date
elif
classify
in
consts
.
OTHER_CLASSIFY_SET
:
# 其他类
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_OTHER
))
continue
...
...
@@ -2021,7 +2048,7 @@ class Command(BaseCommand, LoggerMixin):
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
#need_follow表示在上传edms时文件名是否要添加"关注"两字
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
,
financial_statement_dict
,
financial_explanation_dict
)
count_list
,
need_follow
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
,
contract_result
,
doc
.
metadata
,
financial_statement_dict
,
financial_explanation_dict
,
down_payment_dict
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
...
...
@@ -2122,7 +2149,7 @@ class Command(BaseCommand, LoggerMixin):
license_summary
[
consts
.
BS_CLASSIFY
]
=
bs_rebuild
# 比对
if
len
(
license_summary
)
>
0
:
if
len
(
license_summary
)
>
0
or
classify
in
consts
.
NEW_FILE_COMPARE_SET
:
if
doc
.
document_scheme
!=
consts
.
DOC_SCHEME_LIST
[
2
]:
# if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
try
:
...
...
@@ -2547,7 +2574,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [stop safely]'
.
format
(
self
.
log_base
))
@transaction.atomic
def
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
):
def
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
,
down_payment_dict
):
with
transaction
.
atomic
(
'afc'
):
res_obj
=
result_class
.
objects
.
using
(
'afc'
)
.
select_for_update
()
.
filter
(
application_id
=
doc
.
application_id
)
.
first
()
self
.
online_log
.
info
(
'{0} [sql lock AFC application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
...
...
@@ -2563,6 +2590,10 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
if
res_obj
is
not
None
:
if
financial_explanation_dict
:
res_obj
.
fss_ocr
=
json
.
dumps
([
financial_explanation_dict
])
# 首付款支付承诺书存入数据库
if
res_obj
is
not
None
:
if
down_payment_dict
:
res_obj
.
dp_ocr
=
json
.
dumps
([
down_payment_dict
])
for
classify
,
field
in
consts
.
RESULT_MAPPING
.
items
():
if
not
hasattr
(
res_obj
,
field
):
continue
...
...
@@ -2588,7 +2619,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
return
res_obj
@transaction.atomic
def
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
):
def
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
,
down_payment_dict
):
with
transaction
.
atomic
(
'default'
):
res_obj
=
result_class
.
objects
.
using
(
'default'
)
.
select_for_update
()
.
filter
(
application_id
=
doc
.
application_id
)
.
first
()
self
.
online_log
.
info
(
'{0} [sql lock HIL application_id={1} doc_id={2}]'
.
format
(
self
.
log_base
,
doc
.
application_id
,
doc
.
id
))
...
...
@@ -2604,6 +2635,10 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
if
res_obj
is
not
None
:
if
financial_explanation_dict
:
res_obj
.
fss_ocr
=
json
.
dumps
([
financial_explanation_dict
])
# 首付款支付承诺书存入数据库
if
res_obj
is
not
None
:
if
down_payment_dict
:
res_obj
.
dp_ocr
=
json
.
dumps
([
down_payment_dict
])
for
classify
,
field
in
consts
.
RESULT_MAPPING
.
items
():
if
not
hasattr
(
res_obj
,
field
):
continue
...
...
src/apps/doc/models.py
View file @
9f51de9
...
...
@@ -338,6 +338,7 @@ class AFCOCRResult(models.Model):
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
fs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务报表"
)
fss_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务情况说明书"
)
dp_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"首付款支付承诺书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
...
...
@@ -383,6 +384,7 @@ class HILOCRResult(models.Model):
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
fs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务报表"
)
fss_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务情况说明书"
)
dp_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"首付款支付承诺书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
@@ -426,6 +428,7 @@ class AFCSEOCRResult(models.Model):
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
fs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务报表"
)
fss_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务情况说明书"
)
dp_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"首付款支付承诺书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
@@ -469,6 +472,7 @@ class HILSEOCRResult(models.Model):
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
fs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务报表"
)
fss_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"财务情况说明书"
)
dp_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"首付款支付承诺书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
src/apps/doc/ocr/wb.py
View file @
9f51de9
...
...
@@ -874,6 +874,33 @@ class BSWorkbook(Workbook):
ws
.
append
(
row
)
def
down_payment_rebuild
(
self
,
down_payment_dict
):
"""
Desc:
重构首付款支付承诺书sheet
"""
# 如果 down_payment_dict 为空, 则不创建sheet
if
not
down_payment_dict
:
return
# 如果 down_payment_dict 不为空, 则创建sheet
ws
=
self
.
create_sheet
(
consts
.
DOWN_PAYMENT_SHEET_NAME
)
english_chinese_dict
=
{
"financial_org_name"
:
"渠道"
,
"main_borrower_name"
:
"姓名"
,
"main_borrower_id_no"
:
"证件号码"
,
"apply_no"
:
"合同编号"
,
"contract_name"
:
"合同名称"
,
"promisor_signature"
:
"承诺人签字-电子"
,
"promisor_signature_date"
:
"承诺人签字日期-电子"
}
for
dp_key
,
dp_value
in
down_payment_dict
.
items
():
if
dp_key
in
english_chinese_dict
.
keys
():
row
=
[
english_chinese_dict
[
dp_key
],
str
(
dp_value
)]
ws
.
append
(
row
)
else
:
row
=
[
english_chinese_dict
[
dp_key
],
""
]
ws
.
append
(
row
)
@staticmethod
def
remove_yuan
(
amount_key_set
,
key
,
src_str
):
if
key
in
amount_key_set
and
isinstance
(
src_str
,
str
):
...
...
@@ -973,7 +1000,7 @@ class BSWorkbook(Workbook):
if
len
(
self
.
sheetnames
)
>
1
:
self
.
remove
(
self
.
get_sheet_by_name
(
'Sheet'
))
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
,
metadata
,
financial_statement_dict
,
financial_explanation_dict
):
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
,
contract_result
,
metadata
,
financial_statement_dict
,
financial_explanation_dict
,
down_payment_dict
):
res_count_tuple
=
self
.
res_sheet
(
res_list
)
count_list
=
[(
consts
.
MODEL_FIELD_BS
,
len
(
bs_summary
))]
...
...
@@ -983,12 +1010,14 @@ class BSWorkbook(Workbook):
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
,
metadata
)
self
.
financial_rebuild
(
financial_statement_dict
)
self
.
financial_explanation_rebuild
(
financial_explanation_dict
)
self
.
down_payment_rebuild
(
down_payment_dict
)
else
:
self
.
bs_rebuild
(
bs_summary
,
res_count_tuple
,
metadata
)
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
contract_rebuild
(
contract_result
,
True
)
self
.
financial_rebuild
(
financial_statement_dict
)
self
.
financial_explanation_rebuild
(
financial_explanation_dict
)
self
.
down_payment_rebuild
(
down_payment_dict
)
self
.
move_res_sheet
()
self
.
remove_base_sheet
()
return
count_list
,
self
.
need_follow
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment