Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
0b550510
authored
2023-02-17 10:46:37 +0800
by
王聪
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/uat-tmp' of
http://gitlab.situdata.com/zhouweiqi/bmw-ocr
into feature/uat-tmp
2 parents
2d500b7a
76b3bbd5
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
217 additions
and
77 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/named_enum.py
src/apps/doc/ocr/wb.py
src/apps/doc/views.py
src/common/electronic_afc_contract/get_char_fsm.py
src/common/electronic_hil_contract/get_char_fsm.py
src/common/fsm_econtract/retriever.py
src/apps/doc/consts.py
View file @
0b55051
...
...
@@ -1168,6 +1168,7 @@ MS_ERROR_COL = (5, 6)
WECHART_CLASSIFY
=
12
NEW_ZHIFUBAO_CLASSIFY
=
48
ALI_WECHART_CLASSIFY
=
{
12
,
13
,
48
}
JSYH_CLASSIFY
=
{
11
,
27
,
34
}
WECHART_ERROR_COL
=
(
1
,
2
)
SPECIAL_HEADERS_MAPPING
=
copy
.
deepcopy
(
HEADERS_MAPPING
)
SPECIAL_HEADERS_MAPPING
.
update
(
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
0b55051
...
...
@@ -2009,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin):
report_list
[
5
]
=
BSCheckResult
.
CHECK_FAILED
.
value
finally
:
self
.
online_log
.
info
(
'{0} [task={1}] [license_summary={2}] '
'[contract_result_compare={3}]'
.
format
(
self
.
log_base
,
task_str
,
license_summary
,
contract_result_compare
))
self
.
rebuild_contract
(
license_summary
,
contract_result_compare
)
bs_rebuild
=
self
.
rebuild_bs
(
merged_bs_summary
)
...
...
src/apps/doc/named_enum.py
View file @
0b55051
...
...
@@ -27,6 +27,7 @@ class RequestTeam(NamedEnum):
SETTLEMENT
=
(
1
,
'SETTLEMENT'
)
CONTRACTMANAGEMENT
=
(
2
,
'CONTRACTMANAGEMENT'
)
CONTROLLING
=
(
3
,
'CONTROLLING'
)
INSURANCE
=
(
4
,
'INSURANCE'
)
class
RequestTrigger
(
NamedEnum
):
...
...
src/apps/doc/ocr/wb.py
View file @
0b55051
...
...
@@ -39,6 +39,18 @@ class BSWorkbook(Workbook):
self
.
MAX_MEAN
=
31
self
.
need_follow
=
False
# @staticmethod
# def date_calibration(date_str):
# result = True
# try:
# if date_str[-2] not in ['20', '21']:
# result = False
# if date_str[-5:-3] not in ['03', '06', '09', '12']:
# result = False
# except Exception as e:
# result = False
# return result
@staticmethod
def
replace_newline
(
queryset_value
):
new_set
=
set
()
...
...
@@ -338,7 +350,7 @@ class BSWorkbook(Workbook):
metadata_rows
.
append
((
'Producer'
,
producer
))
if
len
(
author
)
>
0
:
metadata_highlight_row
.
append
(
6
)
if
'iText'
not
in
producer
and
'Qt'
not
in
producer
and
'Haru Free'
not
in
producer
:
if
'iText'
not
in
producer
and
'Qt'
not
in
producer
and
'Haru Free'
not
in
producer
and
'OpenPDF'
not
in
producer
:
metadata_highlight_row
.
append
(
7
)
metadata_rows
.
append
(
self
.
blank_row
)
...
...
@@ -496,6 +508,7 @@ class BSWorkbook(Workbook):
tmp_ws
=
self
.
create_sheet
(
'tmp_ws'
)
tmp2_ws
=
self
.
create_sheet
(
'tmp2_ws'
)
tmp3_ws
=
self
.
create_sheet
(
'tmp3_ws'
)
if
classify
in
consts
.
ALI_WECHART_CLASSIFY
:
high_light_keyword
=
self
.
wechat_keyword
else
:
...
...
@@ -629,7 +642,7 @@ class BSWorkbook(Workbook):
# 关键词1提取
if
summary_cell_value
in
self
.
interest_keyword
:
new_amount_cell_value
=
None
if
amount_cell
is
None
else
amount_cell
.
value
m
s
.
append
((
summary_cell_value
,
date_cell_value
,
new_amount_cell_value
))
tmp3_w
s
.
append
((
summary_cell_value
,
date_cell_value
,
new_amount_cell_value
))
# 关键词2提取至临时表
elif
summary_cell_value
in
self
.
salary_keyword
:
new_amount_cell_value
=
None
if
amount_cell
is
None
else
amount_cell
.
value
...
...
@@ -657,6 +670,18 @@ class BSWorkbook(Workbook):
# if summary_cell_idx is not None:
# new_ws[row][summary_cell_idx].fill = self.amount_fill
# 关键词1信息提取:结息
for
row
in
tmp3_ws
.
iter_rows
(
values_only
=
True
):
ms
.
append
(
row
)
# # 建设银行
# if classify in consts.JSYH_CLASSIFY:
# if isinstance(row[1], str) and self.date_calibration(row[1]):
# pass
# else:
# for cell in ms[ms.max_row]:
# cell.fill = self.amount_fill
self
.
remove
(
tmp3_ws
)
# 关键词2信息提取
ms
.
append
(
self
.
blank_row
)
ms
.
append
(
self
.
salary_keyword_header
)
...
...
src/apps/doc/views.py
View file @
0b55051
...
...
@@ -257,6 +257,7 @@ se_compare_content = {
'fsmSpecialCar'
:
fields
.
Boolean
(
required
=
False
),
'fsmBestPrice'
:
fields
.
Boolean
(
required
=
False
),
'isAutoSettlement'
:
fields
.
Boolean
(
required
=
False
),
'fsmLandingDealer'
:
fields
.
Str
(
required
=
False
,
validate
=
validate
.
Length
(
max
=
1024
)),
'individualCusInfo'
:
fields
.
List
(
fields
.
Nested
(
se_individual_args
),
required
=
True
,
validate
=
validate
.
Length
(
min
=
1
,
max
=
4
)),
...
...
@@ -592,12 +593,12 @@ class UploadDocView(GenericView, DocHandler):
if
business_type
==
consts
.
HIL_PREFIX
:
if
document_scheme
==
RequestTeam
.
ACCEPTANCE
.
name
:
result_class
=
HILOCRResult
elif
document_scheme
==
RequestTeam
.
SETTLEMENT
.
name
:
elif
document_scheme
==
RequestTeam
.
SETTLEMENT
.
name
or
document_scheme
==
RequestTeam
.
INSURANCE
.
name
:
result_class
=
HILSEOCRResult
elif
business_type
==
consts
.
AFC_PREFIX
:
if
document_scheme
==
RequestTeam
.
ACCEPTANCE
.
name
:
result_class
=
AFCOCRResult
elif
document_scheme
==
RequestTeam
.
SETTLEMENT
.
name
:
elif
document_scheme
==
RequestTeam
.
SETTLEMENT
.
name
or
document_scheme
==
RequestTeam
.
INSURANCE
.
name
:
result_class
=
AFCSEOCRResult
ocr_result_obj
=
result_class
.
objects
.
filter
(
application_id
=
application_id
)
.
first
()
...
...
@@ -610,6 +611,7 @@ class UploadDocView(GenericView, DocHandler):
ocr_result_obj
.
fsm_activited
=
1
ocr_result_obj
.
save
()
self
.
running_log
.
info
(
'[doc upload applicationId-{0}] [ocr result saved]'
.
format
(
application_id
))
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
1
]:
if
document_name
.
endswith
(
'-证书.pdf'
)
or
document_name
.
endswith
(
'-证书'
):
self
.
running_log
.
info
(
'[doc upload success] [eapp license skip] [args={0}]'
.
format
(
args
))
...
...
@@ -880,6 +882,7 @@ class SECompareView(GenericView, PreSEHandler):
fsm_flag
=
content
.
get
(
'fsmFlag'
,
False
)
fsm_special_car
=
content
.
get
(
'fsmSpecialCar'
,
False
)
fsm_best_price
=
content
.
get
(
'fsmBestPrice'
,
False
)
fsm_landing_dealer
=
content
.
get
(
'fsmLandingDealer'
)
if
fsm_special_car
:
compare_result
=
{
...
...
src/common/electronic_afc_contract/get_char_fsm.py
View file @
0b55051
...
...
@@ -62,6 +62,7 @@ class Finder:
},
"page_3"
:
{
"合同编号"
:
self
.
item
,
"还款计划表"
:
self
.
item
,
"车辆代理商"
:
self
.
item
,
},
"page_4"
:
{
"合同编号"
:
self
.
item
,
"附加产品融资贷款本金总金额明细"
:
self
.
item
,
...
...
@@ -71,25 +72,45 @@ class Finder:
"page_6"
:
{
"合同编号"
:
self
.
item
,
},
}
self
.
init_result
[
"page_7"
]
=
{
"合同编号"
:
self
.
item
,
}
self
.
init_result
[
"page_8"
]
=
{
"合同编号"
:
self
.
item
,
"主借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"共借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人1签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人2签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"见证人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
}
if
self
.
is_asp
:
self
.
init_result
[
"page_7"
]
=
{
"合同编号"
:
self
.
item
,
}
self
.
init_result
[
"page_8"
]
=
{
"合同编号"
:
self
.
item
,
"主借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"共借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人1签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人2签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"见证人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
}
else
:
self
.
init_result
[
"page_7"
]
=
{
"合同编号"
:
self
.
item
,
"主借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"共借人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人1签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"保证人2签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
"见证人签字"
:
{
"签字"
:
self
.
item
,
"日期"
:
self
.
item
,
},
}
def
get_top_iou
(
self
,
poly
,
ocr_result
):
"""传入一个多边形, 找到与之最匹配的多边形
...
...
@@ -397,6 +418,39 @@ class Finder:
seller
[
'position'
]
=
bbox
seller
[
'words'
]
=
text
return
seller
def
get_cldls
(
self
):
seller
=
self
.
item
.
copy
()
# 先找到 key
anchor_bbox
=
None
for
block
in
self
.
pdf_info
[
'2'
][
'blocks'
]:
if
anchor_bbox
is
not
None
:
break
if
block
[
'type'
]
!=
0
:
continue
for
line
in
block
[
'lines'
]:
if
anchor_bbox
is
not
None
:
break
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
text
.
strip
()
==
'车辆代理商'
:
anchor_bbox
=
bbox
# print(anchor_bbox)
# 当找到了 key, 则根据 key 去匹配 value
if
anchor_bbox
:
half_width
=
self
.
pdf_info
[
'2'
][
'width'
]
*
0.5
for
block
in
self
.
pdf_info
[
'2'
][
'blocks'
]:
if
block
[
'type'
]
!=
0
:
continue
for
line
in
block
[
'lines'
]:
for
span
in
line
[
'spans'
]:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
anchor_bbox
[
2
]
<
np
.
mean
(
bbox
[::
2
])
<
half_width
and
\
anchor_bbox
[
1
]
<
np
.
mean
(
bbox
[
1
::
2
])
<
anchor_bbox
[
3
]:
seller
[
'position'
]
=
bbox
seller
[
'words'
]
=
text
return
seller
return
seller
def
get_borrower_collection_account
(
self
):
account
=
self
.
item
.
copy
()
...
...
@@ -885,6 +939,9 @@ class Finder:
repayment_schedule_table
=
self
.
get_repayment_schedule
()
# print(repayment_schedule_table)
self
.
init_result
[
'page_3'
][
'还款计划表'
]
=
repayment_schedule_table
# 车辆代理商
cldls
=
self
.
get_cldls
()
self
.
init_result
[
'page_3'
][
'车辆代理商'
]
=
cldls
#######################################
# Page 4
# 找合同编号
...
...
@@ -907,43 +964,80 @@ class Finder:
contract_no
=
self
.
get_contract_no
(
page_num
=
'5'
)
# print(contract_no)
self
.
init_result
[
'page_6'
][
'合同编号'
]
=
contract_no
# Page 7
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'6'
)
self
.
init_result
[
'page_7'
][
'合同编号'
]
=
contract_no
# Page 8
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'7'
)
self
.
init_result
[
'page_8'
][
'合同编号'
]
=
contract_no
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'合同编号'
,
bottom
=
'共同借款人'
)
if
signature_name
[
'words'
]
==
None
:
if
self
.
is_asp
:
# Page 7
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'6'
)
self
.
init_result
[
'page_7'
][
'合同编号'
]
=
contract_no
# Page 8
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'7'
)
self
.
init_result
[
'page_8'
][
'合同编号'
]
=
contract_no
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'合同编号'
,
bottom
=
'共同借款人'
)
if
signature_name
[
'words'
]
==
None
:
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'合同编号'
,
bottom
=
'共同借款人(抵押人)'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'主借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'主借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'共同借款人'
,
bottom
=
'保证人1'
)
if
signature_name
[
'words'
]
==
None
:
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'共同借款人(抵押人)'
,
bottom
=
'保证人1'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'共借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'共借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'合同编号'
,
bottom
=
'共同借款人(抵押人)'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'主借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'主借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'共同借款人'
,
bottom
=
'保证人1'
)
if
signature_name
[
'words'
]
==
None
:
top
=
'保证人1'
,
bottom
=
'保证人2'
)
self
.
init_result
[
'page_8'
][
'保证人1签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'保证人1签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'共同借款人(抵押人)'
,
bottom
=
'保证人1'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'共借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'共借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'保证人1'
,
bottom
=
'保证人2'
)
self
.
init_result
[
'page_8'
][
'保证人1签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'保证人1签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'保证人2'
,
bottom
=
'在本人面前亲笔签署本合同'
)
self
.
init_result
[
'page_8'
][
'保证人2签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'保证人2签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'在本人面前亲笔签署本合同'
,
bottom
=
'以下无正文'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'见证人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'见证人签字'
][
'日期'
]
=
signature_date
top
=
'保证人2'
,
bottom
=
'在本人面前亲笔签署本合同'
)
self
.
init_result
[
'page_8'
][
'保证人2签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'保证人2签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'7'
,
top
=
'在本人面前亲笔签署本合同'
,
bottom
=
'以下无正文'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_8'
][
'见证人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_8'
][
'见证人签字'
][
'日期'
]
=
signature_date
else
:
# Page 7
# 找合同编号
contract_no
=
self
.
get_contract_no
(
page_num
=
'6'
)
self
.
init_result
[
'page_7'
][
'合同编号'
]
=
contract_no
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'合同编号'
,
bottom
=
'共同借款人'
)
if
signature_name
[
'words'
]
==
None
:
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'合同编号'
,
bottom
=
'共同借款人(抵押人)'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_7'
][
'主借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'主借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'共同借款人'
,
bottom
=
'保证人1'
)
if
signature_name
[
'words'
]
==
None
:
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'共同借款人(抵押人)'
,
bottom
=
'保证人1'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_7'
][
'共借人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'共借人签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'保证人1'
,
bottom
=
'保证人2'
)
self
.
init_result
[
'page_7'
][
'保证人1签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'保证人1签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'保证人2'
,
bottom
=
'在本人面前亲笔签署本合同'
)
self
.
init_result
[
'page_7'
][
'保证人2签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'保证人2签字'
][
'日期'
]
=
signature_date
signature_name
,
signature_date
=
self
.
get_last_page_signature
(
page_num
=
'6'
,
top
=
'在本人面前亲笔签署本合同'
,
bottom
=
'以下无正文'
)
# print(signature_name, signature_date)
self
.
init_result
[
'page_7'
][
'见证人签字'
][
'签字'
]
=
signature_name
self
.
init_result
[
'page_7'
][
'见证人签字'
][
'日期'
]
=
signature_date
# 重新定制输出
new_results
=
{
"is_asp"
:
self
.
is_asp
,
...
...
src/common/electronic_hil_contract/get_char_fsm.py
View file @
0b55051
...
...
@@ -51,6 +51,7 @@ class Finder:
"合同编号(正文)"
:
self
.
item
,
"车辆识别代码"
:
self
.
item
,
"车辆卖方(经销商)"
:
self
.
item
,
"车辆代理商"
:
self
.
item
,
"车辆原始销售价格(《机动车销售统一发票》所列金额)"
:
self
.
item
,
"车辆附加产品明细表"
:
self
.
item
,
"融资成本总额"
:
self
.
item
,
...
...
@@ -696,11 +697,13 @@ class Finder:
signature_name
[
'position'
]
=
bbox
return
signature_name
,
signature_date
def
get_electronic_signature
(
self
,
top
,
bottom
):
def
get_electronic_signature
(
self
,
top
,
bottom
,
t_pno
=
None
):
signature
=
self
.
item
.
copy
()
anchor_top
=
None
anchor_bottom
=
None
for
pno
in
self
.
pdf_info
:
if
t_pno
is
not
None
and
pno
!=
t_pno
:
continue
for
block
in
self
.
pdf_info
[
pno
][
'blocks'
]:
if
block
[
'type'
]
!=
0
:
continue
...
...
@@ -709,10 +712,15 @@ class Finder:
bbox
,
text
=
span
[
'bbox'
],
span
[
'text'
]
if
top
in
text
:
anchor_top
=
bbox
[
1
]
if
bottom
in
text
:
elif
bottom
in
text
and
anchor_top
is
not
None
and
bbox
[
3
]
>
anchor_top
:
anchor_bottom
=
bbox
[
3
]
if
anchor_top
is
not
None
and
anchor_bottom
is
not
None
:
# print('in')
# print(anchor_top)
# print(anchor_bottom)
for
pno
in
self
.
pdf_info
:
if
t_pno
is
not
None
and
pno
!=
t_pno
:
continue
for
block
in
self
.
pdf_info
[
pno
][
'blocks'
]:
if
block
[
'type'
]
!=
0
:
continue
...
...
@@ -1005,7 +1013,7 @@ class Finder:
words
=
text
.
split
(
':'
)[
-
1
]
_id
[
'position'
]
=
bbox
_id
[
'page'
]
=
pno
_id
[
'words'
]
=
words
_id
[
'words'
]
=
words
.
strip
()
return
name
,
_id
def
get_key_value_position
(
self
,
key
):
...
...
@@ -1243,6 +1251,9 @@ class Finder:
if
seller
[
'words'
]
==
None
:
seller
=
self
.
get_key_value
(
key
=
'车辆卖方:'
)
self
.
init_result
[
'车辆卖方(经销商)'
]
=
seller
# 找到车辆代理商
cldls
=
self
.
get_key_value
(
key
=
'车辆代理商'
,
page_num
=
'4'
)
self
.
init_result
[
'车辆代理商'
]
=
cldls
# 找到 —— 车辆原始销售价格
vehicle_price
=
self
.
get_key_value
(
key
=
'车辆原始销售价格(《机动车销售统一发票》所列金额):'
)
self
.
init_result
[
'车辆原始销售价格(《机动车销售统一发票》所列金额)'
]
=
vehicle_price
...
...
@@ -1264,7 +1275,7 @@ class Finder:
account
=
self
.
get_key_value
(
key
=
'银行账号:'
,
page_num
=
'4'
)
self
.
init_result
[
'收款银行账户-银行账号'
]
=
account
bank
=
self
.
get_key_value
(
key
=
'开户银行:'
,
page_num
=
'4'
)
self
.
init_result
[
'
承租人收款
账户-开户行'
]
=
bank
self
.
init_result
[
'
收款银行
账户-开户行'
]
=
bank
# 找承租人扣款账户户名、银行账号、银行
name
=
self
.
get_key_value
(
key
=
'户名:'
,
page_num
=
'5'
)
self
.
init_result
[
'银行账户-户名'
]
=
name
...
...
@@ -1277,65 +1288,65 @@ class Finder:
# 承租人姓名、签章
if
is_cdfl
==
False
:
name
=
self
.
get_key_value
(
key
=
'承租人姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人姓名:'
,
bottom
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人姓名:'
,
bottom
=
'保证人1姓名:'
,
t_pno
=
'5'
)
if
name
[
"words"
]
==
None
:
name
=
self
.
get_key_value
(
key
=
'承租人一姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-承租人姓名'
]
=
name
self
.
init_result
[
'签字页-承租人签章'
]
=
electronic_signature
# 保证人1姓名、签章
name
=
self
.
get_key_value
(
key
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人1姓名'
]
=
name
self
.
init_result
[
'签字页-保证人1签章'
]
=
electronic_signature
# 这里用的是 name["words"] == ""
if
name
[
"words"
]
==
""
:
name
=
self
.
get_key_value
(
key
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-共同承租人姓名'
]
=
name
self
.
init_result
[
'签字页-共同承租人签章'
]
=
electronic_signature
# 保证人2姓名、签章
name
=
self
.
get_key_value
(
key
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人2姓名'
]
=
name
self
.
init_result
[
'签字页-保证人2签章'
]
=
electronic_signature
# if判断条件对应3_3版本
if
name
[
"words"
]
==
""
:
name
=
self
.
get_key_value
(
key
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人1姓名'
]
=
name
self
.
init_result
[
'签字页-保证人1签章'
]
=
electronic_signature
# 保证人3姓名、签章
name
=
self
.
get_key_value
(
key
=
'保证人3姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人3姓名:'
,
bottom
=
'日期:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人3姓名:'
,
bottom
=
'日期:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人3姓名'
]
=
name
self
.
init_result
[
'签字页-保证人3签章'
]
=
electronic_signature
# if判断条件对应3_3版本
if
name
[
"words"
]
==
None
:
name
=
self
.
get_key_value
(
key
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'日期:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'日期:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人2姓名'
]
=
name
self
.
init_result
[
'签字页-保证人2签章'
]
=
electronic_signature
else
:
name
=
self
.
get_key_value
(
key
=
'承租人一姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'承租人一姓名:'
,
bottom
=
'共同承租人名称:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-承租人姓名'
]
=
name
self
.
init_result
[
'签字页-承租人签章'
]
=
electronic_signature
name
=
self
.
get_key_value
(
key
=
'共同承租人名称:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'共同承租人名称:'
,
bottom
=
'保证人1姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-共同承租人姓名'
]
=
name
self
.
init_result
[
'签字页-共同承租人签章'
]
=
electronic_signature
name
=
self
.
get_key_value
(
key
=
'保证人1姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人1姓名:'
,
bottom
=
'保证人2姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人1姓名'
]
=
name
self
.
init_result
[
'签字页-保证人1签章'
]
=
electronic_signature
name
=
self
.
get_key_value
(
key
=
'保证人2姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'保证人2姓名:'
,
bottom
=
'保证人3姓名:'
,
t_pno
=
'5'
)
self
.
init_result
[
'签字页-保证人2姓名'
]
=
name
self
.
init_result
[
'签字页-保证人2签章'
]
=
electronic_signature
...
...
@@ -1404,12 +1415,12 @@ class Finder:
self
.
init_result_2
[
'融资租赁期限'
]
=
lease_term
# 签字页抵押人姓名和签章
name
=
self
.
get_key_value
(
key
=
'抵押人姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押权人盖章'
,
bottom
=
'抵押人配偶姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押权人盖章'
,
bottom
=
'抵押人配偶姓名:'
,
t_pno
=
'1'
)
self
.
init_result_2
[
'签字页-抵押人姓名'
]
=
name
self
.
init_result_2
[
'签字页-抵押人签章'
]
=
electronic_signature
# 签字页抵押人配偶姓名和签章
name
=
self
.
get_key_value
(
key
=
'抵押人配偶姓名:'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押人配偶姓名:'
,
bottom
=
'日期'
)
electronic_signature
=
self
.
get_electronic_signature
(
top
=
'抵押人配偶姓名:'
,
bottom
=
'日期'
,
t_pno
=
'1'
)
self
.
init_result_2
[
'签字页-抵押人配偶姓名'
]
=
name
self
.
init_result_2
[
'签字页-抵押人配偶签章'
]
=
electronic_signature
return
self
.
init_result_2
\ No newline at end of file
...
...
src/common/fsm_econtract/retriever.py
View file @
0b55051
...
...
@@ -6,6 +6,7 @@ class HMHRetriever:
def
__init__
(
self
):
self
.
words_str
=
'words'
self
.
position_str
=
'location'
self
.
fix_hava_str
=
'有'
self
.
default_position
=
[
0
,
0
,
0
,
0
]
self
.
search_fields_list
=
[
(
'借款/承租人姓名'
,
''
),
...
...
@@ -51,7 +52,8 @@ class HMHRetriever:
for
name_date_tuple
in
name_date_list
:
if
len
(
name_date_tuple
)
==
2
:
result
[
self
.
search_fields_list
[
4
][
0
]]
=
{
self
.
words_str
:
'{0} {1}'
.
format
(
name_date_tuple
[
0
]
.
replace
(
'
\u3000
'
,
''
)
.
strip
(),
name_date_tuple
[
1
]),
# self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
self
.
words_str
:
self
.
fix_hava_str
,
self
.
position_str
:
bbox
}
is_find_name_date
=
True
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment