Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
3b1e6657
authored
2021-12-23 11:23:04 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix folder
1 parent
6e9f7b32
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
6 deletions
src/apps/doc/management/commands/folder_ocr_process.py
src/apps/doc/management/commands/folder_wsc_process.py
src/celery_compare/tasks.py
src/apps/doc/management/commands/folder_ocr_process.py
View file @
3b1e665
This diff is collapsed.
Click to expand it.
src/apps/doc/management/commands/folder_wsc_process.py
View file @
3b1e665
...
...
@@ -48,6 +48,7 @@ class Finder:
"其他约定与条件英文"
:
""
,
"其他约定与条件中文"
:
""
,
}
def
get_line
(
self
,
ocr_results
,
key_string
):
# 根据指定关键词, 找出与关键词同处一行的字符
top
,
bottom
=
-
1
,
-
1
...
...
@@ -69,6 +70,7 @@ class Finder:
line_text
=
sorted
(
line_text
,
key
=
lambda
x
:
x
[
0
][
0
],
reverse
=
False
)
lines
=
''
.
join
([
i
[
1
]
for
i
in
line_text
])
return
lines
def
page_predict
(
self
,
ocr_results
,
page_template
):
classes
=
[]
for
pno
in
ocr_results
:
...
...
@@ -82,6 +84,7 @@ class Finder:
classes
.
append
([
pno
,
score
])
pred
=
sorted
(
classes
,
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)[
0
]
return
pred
def
get_top_key
(
self
,
ocr_results
,
key_string
):
# 加入过滤词典
"""找到与 key_string 最匹配的字段的 key
"""
...
...
@@ -90,6 +93,7 @@ class Finder:
ratio_list
=
[[
fuzz
.
ratio
(
key_string
,
ocr_results
[
key
][
1
]),
key
]
for
key
in
ocr_results
]
top_key
=
sorted
(
ratio_list
,
key
=
lambda
x
:
x
[
0
])[
-
1
]
return
top_key
def
get_top_iou
(
self
,
ocr_results
,
poly
):
"""求最大IoU
"""
...
...
@@ -108,6 +112,7 @@ class Finder:
return
-
1
,
-
1
top_iou
=
sorted
(
iou_list
,
key
=
lambda
x
:
x
[
0
])[
-
1
]
return
top_iou
def
get_key_value
(
self
,
ocr_results
,
key_string
):
"""根据 key 查找 value
"""
...
...
@@ -139,6 +144,7 @@ class Finder:
else
:
value
=
words
return
value
def
get_contract_No
(
self
):
"""提取左上角的合同编号字段
"""
...
...
@@ -153,6 +159,7 @@ class Finder:
# TODO!!!
contract_No_list
.
append
(
contract_No
)
return
contract_No_list
def
get_info_in_page_3
(
self
):
"""提取第三页上的经销商名称,和经销商统一社会信用代码或公司注册号
"""
...
...
@@ -178,6 +185,7 @@ class Finder:
words
=
self
.
get_key_value
(
self
.
ocr_results
[
pno
],
'统一社会信用代码或公司注册号'
)
dealer_No
=
words
.
replace
(
'O'
,
'0'
)
return
dealer_name
,
dealer_No
def
get_info_in_page_38
(
self
):
"""提取第38页上的经销商名称
"""
...
...
@@ -195,6 +203,7 @@ class Finder:
words
=
re
.
sub
(
r'[(())盖章《]'
,
""
,
words
)
dealer_name
=
words
return
dealer_name
def
get_guarantor
(
self
):
"""提取第10页上保证人段落,所见即所得
"""
...
...
@@ -210,6 +219,7 @@ class Finder:
words
=
words
.
replace
(
'【'
,
'['
)
.
replace
(
'】'
,
']'
)
.
replace
(
','
,
','
)
.
replace
(
'('
,
'('
)
.
replace
(
')'
,
')'
)
guarantor
=
words
return
guarantor
def
get_info_in_page_39
(
self
):
"""提取综合授信合同上的一些字段
"""
...
...
@@ -291,6 +301,7 @@ class Finder:
deposit_chn
=
f
'{words}
%
'
return
amount_eng
,
amount_chn
,
term_start_eng
,
term_end_eng
,
\
term_start_chn
,
term_end_chn
,
deposit_eng
,
deposit_chn
def
get_other_arrangements_and_conditions
(
self
):
"""获取其它约定与条件文本段落
"""
...
...
@@ -311,6 +322,7 @@ class Finder:
words
=
searchObj
.
group
(
1
)
other_arrangements_and_conditions_chn
=
words
return
other_arrangements_and_conditions_eng
,
other_arrangements_and_conditions_chn
def
get_info
(
self
):
# 按照文档页码返回一个合同编号列表,依次表示每一页上识别到的合同编号
contract_No_list
=
self
.
get_contract_No
()
...
...
@@ -337,6 +349,7 @@ class Finder:
self
.
init_result
[
"其他约定与条件中文"
]
=
words_chn
return
self
.
init_result
class
TIFFHandler
:
def
__init__
(
self
,
path
,
img_save_path
):
...
...
@@ -568,6 +581,7 @@ class Command(BaseCommand, LoggerMixin):
if
len
(
true_file_set
)
==
0
and
len
(
os_error_filename_set
)
>
0
:
true_file_set
.
add
(
os_error_filename_set
.
pop
())
for
name
in
true_file_set
:
time
.
sleep
(
10
)
# 防止文件较大时,读取到不完整文件
path
=
os
.
path
.
join
(
input_dir
,
name
)
try
:
...
...
src/celery_compare/tasks.py
View file @
3b1e665
...
...
@@ -989,7 +989,7 @@ def get_se_cms_compare_info_auto(last_obj, application_entity):
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
0
],
hmh_name
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
1
],
hmh_id
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
2
],
first_submission_date
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
3
],
consts
.
SE_STAMP_VALUE
))
#
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
4
],
consts
.
SE_FPL_VALUE
))
bhsj
=
float
(
amount
)
/
1.13
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
5
],
consts
.
SPLIT_STR
.
join
([
...
...
@@ -1464,7 +1464,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list):
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
0
],
hmh_name
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
1
],
hmh_id
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
2
],
first_submission_date
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
3
],
consts
.
SE_STAMP_VALUE
))
#
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
4
],
consts
.
SE_FPL_VALUE
))
bhsj
=
float
(
amount
)
/
1.13
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
5
],
consts
.
SPLIT_STR
.
join
([
...
...
@@ -1499,7 +1499,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list):
gb34_field_input
.
append
((
consts
.
SE_GB_USED_FIELD
[
1
],
main_num
))
gb34_field_input
.
append
((
consts
.
SE_GB_USED_FIELD
[
2
],
first_submission_date
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
2
],
first_submission_date
))
vehicle_field_input
.
append
((
consts
.
SE_NEW_ADD_FIELD
[
3
],
consts
.
SE_STAMP_VALUE
))
#
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
jypz_field_input
.
append
((
'vinNo'
,
vin_no
))
jypz_field_input
.
append
((
'vehicleTransactionAmount'
,
amount
))
jypz_field_input
.
append
((
consts
.
SE_GB_USED_FIELD
[
-
1
],
first_submission_date
))
...
...
@@ -2683,9 +2683,9 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh):
for
i
in
cn_reason_list
:
if
i
in
tmp_set
:
continue
elif
i
in
consts
.
BS_REASON
:
tmp_set
.
add
(
i
)
bs_cn_reason_list
.
append
(
i
)
#
elif i in consts.BS_REASON:
#
tmp_set.add(i)
#
bs_cn_reason_list.append(i)
else
:
tmp_set
.
add
(
i
)
last_cn_reason_list
.
append
(
i
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment