Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
8d595a3e
authored
2022-12-27 15:28:55 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add FSM AFC/HIL Contract
1 parent
a9ba395a
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
22 additions
and
9 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/named_enum.py
src/apps/doc/views.py
src/common/electronic_afc_contract/afc_contract_ocr.py
src/common/electronic_afc_contract/get_char_fsm.py
src/common/electronic_hil_contract/get_char_fsm.py
src/common/electronic_hil_contract/hil_contract_ocr.py
src/apps/doc/consts.py
View file @
8d595a3
...
...
@@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10
FIXED_APPLICATION_ID_PREFIX
=
'CH-S'
DOC_SCHEME_LIST
=
[
'ACCEPTANCE'
,
'SETTLEMENT'
,
'CONTRACTMANAGEMENT'
]
DATA_SOURCE_LIST
=
[
'POS'
,
'EAPP'
,
'ECONTRACT'
]
DATA_SOURCE_LIST
=
[
'POS'
,
'EAPP'
,
'ECONTRACT'
,
'OVP'
]
COMPARE_DOC_SCHEME_LIST
=
[
'CA'
,
'SE'
]
HIL_PREFIX
=
'HIL'
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
8d595a3
...
...
@@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin):
# AFC合同
if
classify_1_str
==
str
(
consts
.
CONTRACT_CLASSIFY
):
ocr_result
=
afc_predict
(
pdf_handler
.
pdf_info
)
is_fsm
=
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
3
]
ocr_result
=
afc_predict
(
pdf_handler
.
pdf_info
,
is_fsm
=
is_fsm
)
page_res
=
{}
for
page_num
,
page_info
in
ocr_result
.
get
(
'page_info'
,
{})
.
items
():
if
isinstance
(
page_num
,
str
)
and
page_num
.
startswith
(
'page_'
):
...
...
@@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin):
}
# HIL合同
elif
classify_1_str
in
consts
.
HIL_CONTRACT_TYPE_MAP
:
is_fsm
=
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
3
]
file_type_1
=
consts
.
HIL_CONTRACT_TYPE_MAP
.
get
(
classify_1_str
)
ocr_result_1
=
hil_predict
(
pdf_handler
.
pdf_info
,
file_type_1
)
ocr_result_1
=
hil_predict
(
pdf_handler
.
pdf_info
,
file_type_1
,
is_fsm
=
is_fsm
)
rebuild_res_1
=
{}
page_res
=
{}
for
field_name
,
field_info
in
ocr_result_1
.
items
():
...
...
@@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin):
'page_info'
:
page_info
}
# hmh
else
:
pass
#
else:
#
pass
contract_res
=
{}
...
...
src/apps/doc/named_enum.py
View file @
8d595a3
...
...
@@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum):
DOCUPLOAD
=
(
3
,
'Document Upload'
)
SUBMITING
=
(
4
,
'Submiting'
)
UPLOADING
=
(
5
,
'Uploading'
)
OVP
=
(
6
,
'OVP'
)
class
FailureReason
(
NamedEnum
):
...
...
src/apps/doc/views.py
View file @
8d595a3
...
...
@@ -590,8 +590,9 @@ class UploadDocView(GenericView, DocHandler):
is_zip
=
False
classify_1
=
0
# 电子合同
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
-
1
]
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
# 电子合同 Econtract or OVP(FSM)
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
2
]
or
data_source
==
consts
.
DATA_SOURCE_LIST
[
3
]:
if
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
...
...
src/common/electronic_afc_contract/afc_contract_ocr.py
View file @
8d595a3
...
...
@@ -6,6 +6,7 @@
# @Description :
from
.get_char
import
Finder
from
.get_char_fsm
import
Finder
as
FSMFinder
import
numpy
as
np
...
...
@@ -23,7 +24,7 @@ def extract_info(ocr_results):
return
{
'page_1'
:
{
'合同编号'
:
contract_no
}}
def
predict
(
pdf_info
,
is_qrs
=
False
):
def
predict
(
pdf_info
,
is_qrs
=
False
,
is_fsm
=
False
):
ocr_results
=
{}
for
pno
in
pdf_info
:
ocr_results
[
pno
]
=
{}
...
...
@@ -50,6 +51,9 @@ def predict(pdf_info, is_qrs=False):
results
=
extract_info
(
ocr_results
)
else
:
# 输入是整个 PDF 中的信息
if
is_fsm
:
f
=
FSMFinder
(
pdf_info
,
ocr_results
=
ocr_results
)
else
:
f
=
Finder
(
pdf_info
,
ocr_results
=
ocr_results
)
results
=
f
.
get_info
()
return
results
...
...
src/common/electronic_afc_contract/get_char_fsm.py
0 → 100644
View file @
8d595a3
This diff is collapsed.
Click to expand it.
src/common/electronic_hil_contract/get_char_fsm.py
0 → 100644
View file @
8d595a3
This diff is collapsed.
Click to expand it.
src/common/electronic_hil_contract/hil_contract_ocr.py
View file @
8d595a3
...
...
@@ -6,9 +6,10 @@
# @Description :
from
.get_char
import
Finder
from
.get_char_fsm
import
Finder
as
FSMFinder
def
predict
(
pdf_info
,
file_cls
):
def
predict
(
pdf_info
,
file_cls
,
is_fsm
=
False
):
"""Summary
Args:
...
...
@@ -58,6 +59,10 @@ def predict(pdf_info, file_cls):
pdf_info
=
dict
()
for
pno
,
page_info
in
enumerate
(
pdf_info_1
):
pdf_info
[
str
(
pno
)]
=
page_info
if
is_fsm
:
f
=
FSMFinder
(
pdf_info
)
else
:
f
=
Finder
(
pdf_info
)
if
file_cls
==
0
:
results
=
f
.
get_info
()
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment