Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
fc360367
authored
2021-05-07 17:54:07 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
report part 1
1 parent
9b2170ec
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
186 additions
and
3 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/apps/doc/named_enum.py
src/apps/doc/management/commands/ocr_process.py
View file @
fc36036
...
...
@@ -20,10 +20,10 @@ from common.tools.file_tools import write_zip_file
from
common.tools.pdf_to_img
import
PDFHandler
from
apps.doc
import
consts
from
apps.doc.ocr.edms
import
EDMS
,
rh
from
apps.doc.named_enum
import
KeywordsType
from
apps.doc.named_enum
import
KeywordsType
,
FailureReason
,
WorkflowName
,
ProcessName
,
RequestTeam
,
RequestTrigger
from
apps.doc.exceptions
import
EDMSException
,
OCR1Exception
,
OCR2Exception
,
OCR4Exception
from
apps.doc.ocr.wb
import
BSWorkbook
from
apps.doc.models
import
DocStatus
,
HILDoc
,
AFCDoc
,
Keywords
,
HILOCRResult
,
AFCOCRResult
from
apps.doc.models
import
DocStatus
,
HILDoc
,
AFCDoc
,
Keywords
,
HILOCRResult
,
AFCOCRResult
,
HILOCRReport
,
AFCOCRReport
from
celery_compare.tasks
import
compare
...
...
@@ -604,6 +604,24 @@ class Command(BaseCommand, LoggerMixin):
# return
except
Exception
as
e
:
try
:
end_time
=
timezone
.
now
()
report_table
=
HILOCRReport
if
business_type
==
consts
.
HIL_PREFIX
else
AFCOCRReport
report_table
.
objects
.
create
(
case_number
=
doc
.
application_id
,
request_team
=
RequestTeam
.
get_value
(
doc
.
document_scheme
,
0
),
request_trigger
=
RequestTrigger
.
get_value
(
doc
.
data_source
,
0
),
input_file
=
doc
.
document_name
,
transaction_start
=
doc
.
start_time
,
transaction_end
=
end_time
,
successful_at_this_level
=
False
,
failure_reason
=
FailureReason
.
PDF
.
value
,
process_name
=
ProcessName
.
ALL
.
value
,
)
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (report db save)] [error={1}]'
.
format
(
self
.
log_base
,
traceback
.
format_exc
()))
try
:
doc
.
status
=
DocStatus
.
PROCESS_FAILED
.
value
doc
.
save
()
self
.
online_log
.
warn
(
'{0} [process failed (pdf_2_img_2_queue)] [task={1}] '
...
...
@@ -702,12 +720,19 @@ class Command(BaseCommand, LoggerMixin):
try
:
doc
=
doc_class
.
objects
.
filter
(
id
=
doc_id
)
.
first
()
# report_dict = {
# 'process': None or pdf or excel or edms
# 'idcard': True or False,
# 'bs': None or normal or mobile,
# }
report_list
=
[
None
,
False
,
None
]
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (db filter)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
else
:
try
:
# 4.OCR结果并且构建excel文件
bs_classify_set
=
set
()
bs_summary
=
{}
unknown_summary
=
{}
license_summary
=
{}
...
...
@@ -795,6 +820,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
warn
(
'{0} [ocr_2 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
else
:
# 流水处理
bs_classify_set
.
add
(
classify
)
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_1
))
...
...
@@ -806,8 +832,15 @@ class Command(BaseCommand, LoggerMixin):
# self.license_log.info('[task={0}] [license_summary={1}]'.format(task_str, license_summary))
idcard_list
=
license_summary
.
get
(
consts
.
IC_CLASSIFY
)
if
idcard_list
:
report_list
[
1
]
=
True
self
.
idcard_log
.
info
(
'[task={0}] [idcard={1}]'
.
format
(
task_str
,
idcard_list
))
if
len
(
bs_classify_set
)
>
0
:
if
consts
.
ALI_WECHART_CLASSIFY
&
bs_classify_set
:
report_list
[
2
]
=
WorkflowName
.
MOBILE
.
value
else
:
report_list
[
2
]
=
WorkflowName
.
NORMAL
.
value
merged_bs_summary
=
self
.
rebuild_bs_summary
(
bs_summary
,
unknown_summary
)
del
unknown_summary
...
...
@@ -821,6 +854,7 @@ class Command(BaseCommand, LoggerMixin):
except
Exception
as
e
:
report_list
[
0
]
=
FailureReason
.
EXCEL
.
value
self
.
online_log
.
warn
(
'{0} [process failed (res conformity)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
...
...
@@ -842,6 +876,7 @@ class Command(BaseCommand, LoggerMixin):
except
Exception
as
e
:
report_list
[
0
]
=
FailureReason
.
EXCEL
.
value
self
.
online_log
.
warn
(
'{0} [process failed (wb rebuild)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
...
...
@@ -868,12 +903,17 @@ class Command(BaseCommand, LoggerMixin):
else
:
raise
EDMSException
(
edms_exc
)
except
Exception
as
e
:
report_list
[
0
]
=
FailureReason
.
EDMS
.
value
doc
.
status
=
DocStatus
.
UPLOAD_FAILED
.
value
self
.
online_log
.
warn
(
'{0} [process failed (edms upload)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
else
:
doc
.
status
=
DocStatus
.
COMPLETE
.
value
self
.
online_log
.
info
(
'{0} [edms upload success] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
finally
:
try
:
doc
.
end_time
=
timezone
.
now
()
...
...
@@ -938,6 +978,74 @@ class Command(BaseCommand, LoggerMixin):
else
:
self
.
online_log
.
info
(
'{0} [comparison info send success] [task={1}] '
'[res_id={2}]'
.
format
(
self
.
log_base
,
task_str
,
res_obj
.
id
))
finally
:
# report_dict = {
# 'process': None or pdf or excel or edms
# 'idcard': True or False,
# 'bs': None or normal or mobile,
# }
end_time
=
timezone
.
now
()
report_table
=
HILOCRReport
if
business_type
==
consts
.
HIL_PREFIX
else
AFCOCRReport
try
:
if
report_list
[
0
]
is
None
:
report_table
.
objects
.
create
(
case_number
=
doc
.
application_id
,
request_team
=
RequestTeam
.
get_value
(
doc
.
document_scheme
,
0
),
request_trigger
=
RequestTrigger
.
get_value
(
doc
.
data_source
,
0
),
input_file
=
doc
.
document_name
,
transaction_start
=
doc
.
start_time
,
transaction_end
=
end_time
,
process_name
=
ProcessName
.
ALL
.
value
,
)
else
:
report_table
.
objects
.
create
(
case_number
=
doc
.
application_id
,
request_team
=
RequestTeam
.
get_value
(
doc
.
document_scheme
,
0
),
request_trigger
=
RequestTrigger
.
get_value
(
doc
.
data_source
,
0
),
input_file
=
doc
.
document_name
,
transaction_start
=
doc
.
start_time
,
transaction_end
=
end_time
,
successful_at_this_level
=
False
,
failure_reason
=
report_list
[
0
],
process_name
=
ProcessName
.
ALL
.
value
,
)
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (report db save)] [error={1}]'
.
format
(
self
.
log_base
,
traceback
.
format_exc
()))
try
:
if
report_list
[
1
]:
report_table
.
objects
.
create
(
case_number
=
doc
.
application_id
,
request_team
=
RequestTeam
.
CONTROLLING
.
value
,
request_trigger
=
RequestTrigger
.
DOCUPLOAD
.
value
,
input_file
=
doc
.
document_name
,
transaction_start
=
doc
.
start_time
,
transaction_end
=
end_time
,
process_name
=
ProcessName
.
IDCARD
.
value
,
)
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (report db save)] [error={1}]'
.
format
(
self
.
log_base
,
traceback
.
format_exc
()))
try
:
if
report_list
[
2
]
is
not
None
:
report_table
.
objects
.
create
(
case_number
=
doc
.
application_id
,
request_team
=
RequestTeam
.
get_value
(
doc
.
document_scheme
,
0
),
request_trigger
=
RequestTrigger
.
DOCUPLOAD
.
value
,
input_file
=
doc
.
document_name
,
transaction_start
=
doc
.
start_time
,
transaction_end
=
end_time
,
process_name
=
ProcessName
.
BS
.
value
,
workflow_name
=
report_list
[
2
],
)
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (report db save)] [error={1}]'
.
format
(
self
.
log_base
,
traceback
.
format_exc
()))
finally
:
try
:
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
...
...
src/apps/doc/models.py
View file @
fc36036
from
django.db
import
models
from
.named_enum
import
DocStatus
,
KeywordsType
,
Re
tryStep
from
.named_enum
import
DocStatus
,
KeywordsType
,
Re
questTeam
,
RequestTrigger
,
FailureReason
,
ProcessName
,
WorkflowName
# Create your models here.
...
...
@@ -267,3 +267,45 @@ class HILOCRResult(models.Model):
managed
=
False
db_table
=
'hil_ocr_result'
# OCR Report
class
HILOCRReport
(
models
.
Model
):
id
=
models
.
AutoField
(
primary_key
=
True
,
verbose_name
=
"id"
)
# 主键
case_number
=
models
.
CharField
(
max_length
=
64
,
verbose_name
=
"申请id"
)
request_team
=
models
.
SmallIntegerField
(
default
=
RequestTeam
.
ACCEPTANCE
.
value
,
verbose_name
=
"来源"
)
request_trigger
=
models
.
SmallIntegerField
(
default
=
RequestTrigger
.
POS
.
value
,
verbose_name
=
"触发者"
)
input_file
=
models
.
CharField
(
max_length
=
255
,
verbose_name
=
"文件名"
)
transaction_start
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'开始时间'
)
# 索引
transaction_end
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'结束时间'
)
successful_at_this_level
=
models
.
BooleanField
(
default
=
True
,
verbose_name
=
"是否成功"
)
failure_reason
=
models
.
SmallIntegerField
(
null
=
True
,
verbose_name
=
"失败原因"
)
process_name
=
models
.
SmallIntegerField
(
default
=
ProcessName
.
ALL
.
value
,
verbose_name
=
"流程名称"
)
total_fields
=
models
.
IntegerField
(
null
=
True
,
verbose_name
=
'比对字段数目'
)
workflow_name
=
models
.
SmallIntegerField
(
null
=
True
,
verbose_name
=
"工作流程"
)
class
Meta
:
managed
=
False
db_table
=
'hil_ocr_report'
class
AFCOCRReport
(
models
.
Model
):
id
=
models
.
AutoField
(
primary_key
=
True
,
verbose_name
=
"id"
)
# 主键
case_number
=
models
.
CharField
(
max_length
=
64
,
verbose_name
=
"申请id"
)
request_team
=
models
.
SmallIntegerField
(
default
=
RequestTeam
.
ACCEPTANCE
.
value
,
verbose_name
=
"来源"
)
request_trigger
=
models
.
SmallIntegerField
(
default
=
RequestTrigger
.
POS
.
value
,
verbose_name
=
"触发者"
)
input_file
=
models
.
CharField
(
max_length
=
255
,
verbose_name
=
"文件名"
)
transaction_start
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'开始时间'
)
# 索引
transaction_end
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'结束时间'
)
successful_at_this_level
=
models
.
BooleanField
(
default
=
True
,
verbose_name
=
"是否成功"
)
failure_reason
=
models
.
SmallIntegerField
(
null
=
True
,
verbose_name
=
"失败原因"
)
process_name
=
models
.
SmallIntegerField
(
default
=
ProcessName
.
ALL
.
value
,
verbose_name
=
"流程名称"
)
total_fields
=
models
.
IntegerField
(
null
=
True
,
verbose_name
=
'比对字段数目'
)
workflow_name
=
models
.
SmallIntegerField
(
null
=
True
,
verbose_name
=
"工作流程"
)
class
Meta
:
managed
=
False
db_table
=
'afc_ocr_report'
situ_db_label
=
'afc'
...
...
src/apps/doc/named_enum.py
View file @
fc36036
...
...
@@ -19,3 +19,36 @@ class KeywordsType(NamedEnum):
SALARY
=
(
1
,
'薪资'
)
LOAN
=
(
2
,
'贷款'
)
ALI_WECHART
=
(
3
,
'微信/支付宝'
)
class
RequestTeam
(
NamedEnum
):
ACCEPTANCE
=
(
0
,
'ACCEPTANCE'
)
SETTLEMENT
=
(
1
,
'SETTLEMENT'
)
CONTRACTMANAGEMENT
=
(
2
,
'CONTRACTMANAGEMENT'
)
CONTROLLING
=
(
3
,
'CONTROLLING'
)
class
RequestTrigger
(
NamedEnum
):
POS
=
(
0
,
'POS'
)
EAPP
=
(
1
,
'EAPP'
)
ECONTRACT
=
(
2
,
'ECONTRACT'
)
DOCUPLOAD
=
(
3
,
'Document Upload'
)
class
FailureReason
(
NamedEnum
):
PDF
=
(
0
,
'PDF处理失败'
)
EXCEL
=
(
1
,
'构建excel失败'
)
EDMS
=
(
2
,
'EDMS上传失败'
)
class
ProcessName
(
NamedEnum
):
ALL
=
(
0
,
'S1_All_DocumentUpload'
)
BS
=
(
1
,
'S1_CA_BankStatementCalculation'
)
IDCARD
=
(
2
,
'F2_IDReport'
)
DDA
=
(
3
,
'CL_S1_DDAConsolidation'
)
class
WorkflowName
(
NamedEnum
):
NORMAL
=
(
0
,
'Normal BS'
)
MOBILE
=
(
0
,
'Mobile BS'
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment