Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
d6fc968c
authored
2025-02-14 14:45:51 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'hotfix/2025-02'
2 parents
8eb3eae2
a0de225f
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
68 additions
and
29 deletions
src/apps/doc/management/commands/ocr_process.py
src/celery_compare/tasks.py
src/settings/__init__.py
src/apps/doc/management/commands/ocr_process.py
View file @
d6fc968
...
...
@@ -2130,9 +2130,19 @@ class Command(BaseCommand, LoggerMixin):
# 更新OCR累计识别结果表
if
business_type
==
consts
.
HIL_PREFIX
:
result_class
=
HILOCRResult
if
is_ca
else
HILSEOCRResult
try
:
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
# 遇到报错重试一次,希望解决两个文件首次入库都插入的问题
self
.
online_log
.
error
(
'{0} [process error (ocr result save) retry] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
else
:
result_class
=
AFCOCRResult
if
is_ca
else
AFCSEOCRResult
try
:
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
# 遇到报错重试一次,希望解决两个文件首次入库都插入的问题
self
.
online_log
.
error
(
'{0} [process error (ocr result save) retry] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
...
...
@@ -2172,9 +2182,19 @@ class Command(BaseCommand, LoggerMixin):
# 更新OCR累计识别结果表
if
business_type
==
consts
.
HIL_PREFIX
:
result_class
=
HILOCRResult
if
is_ca
else
HILSEOCRResult
try
:
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
# 遇到报错重试一次,希望解决两个文件首次入库都插入的问题
self
.
online_log
.
error
(
'{0} [process error (ocr result save) retry] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
res_obj
=
atomicSaveDBHIL
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
else
:
result_class
=
AFCOCRResult
if
is_ca
else
AFCSEOCRResult
try
:
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
# 遇到报错重试一次,希望解决两个文件首次入库都插入的问题
self
.
online_log
.
error
(
'{0} [process error (ocr result save) retry] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
res_obj
=
atomicSaveDBAFC
(
self
,
result_class
,
doc
,
license_summary
,
ic_merge
,
rp_merge
,
task_str
,
financial_statement_dict
,
financial_explanation_dict
)
except
Exception
as
e
:
self
.
online_log
.
error
(
...
...
src/celery_compare/tasks.py
View file @
d6fc968
...
...
@@ -3773,20 +3773,29 @@ def fsm_compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=
compare_log
.
info
(
'[fsm thread]'
)
#pool = ThreadPoolExecutor(max_workers=6, thread_name_prefix="fsm_thread_")
try
:
# 这个try不生效
pool
.
submit
(
fsm_compare_thread
,
application_id
,
application_entity
,
uniq_seq
,
ocr_res_id
,
is_ca
,
is_cms
)
except
Exception
as
e
:
compare_log
.
info
(
'[fsm thread fail] [error={0}]'
.
format
(
traceback
.
format_exc
()))
#pool.shutdown(wait=True)
def
fsm_compare_thread
(
application_id
,
application_entity
,
uniq_seq
,
ocr_res_id
,
is_ca
=
True
,
is_cms
=
False
):
try
:
compare_log
.
info
(
'{0} [receive fsm task] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] [is_ca={5}] '
'[is_cms={6}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
is_ca
,
is_cms
))
# 查看此订单号下是否有未完成的文件,如果有,等1分钟
try
:
doc_wait_file_class
=
HILDoc
if
application_entity
==
consts
.
HIL_PREFIX
else
AFCDoc
doc_wait_file_result
=
doc_wait_file_class
.
objects
.
filter
(
application_id
=
application_id
,
status
=
1
)
.
first
()
compare_log
.
info
(
'doc_wait_file_result:{0}'
.
format
(
doc_wait_file_result
))
compare_log
.
info
(
'{0} [comparison unfinished file check] [entity={1}] [id={2}] [doc_wait_file_result={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
))
except
Exception
as
e
:
doc_wait_file_result
=
None
compare_log
.
info
(
'[get doc_wait_file_result fail] [error={0}]'
.
format
(
traceback
.
format_exc
()))
if
doc_wait_file_result
is
not
None
:
# 实时查询延迟时间
try
:
...
...
@@ -3806,7 +3815,6 @@ def fsm_compare_thread(application_id, application_entity, uniq_seq, ocr_res_id,
compare_log
.
info
(
'[sleep error] [error={0}]'
.
format
(
traceback
.
format_exc
()))
compare_log
.
info
(
'{0} [comparison unfinished file wait delay_time end] [entity={1}] [id={2}] [doc_id={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
.
id
))
# 调用java fsm 比对流程接口(http)
# 调用Java fsm 比对流程接口, fsm 是se流程, ca可以暂时忽略
auto_class
=
HILAutoSettlement
if
application_entity
==
consts
.
HIL_PREFIX
else
AFCAutoSettlement
...
...
@@ -3835,6 +3843,8 @@ def fsm_compare_thread(application_id, application_entity, uniq_seq, ocr_res_id,
except
Exception
as
e
:
compare_log
.
error
(
"fsm full request to java error, url:{0}, param:{1}, errorMsg:{2}"
.
format
(
url
,
json
.
dumps
(
body
),
traceback
.
format_exc
()))
except
Exception
as
e
:
compare_log
.
info
(
'[fsm_compare_thread error] [error={0}]'
.
format
(
traceback
.
format_exc
()))
@app.task
...
...
@@ -3851,42 +3861,21 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True
compare_log
.
info
(
'[non fsm thread]'
)
#pool = ThreadPoolExecutor(max_workers=6, thread_name_prefix="non_fsm_thread_")
try
:
# 这个try不生效
pool
.
submit
(
compare_thread
,
application_id
,
application_entity
,
uniq_seq
,
ocr_res_id
,
is_ca
,
is_cms
)
except
Exception
as
e
:
compare_log
.
info
(
'[non fsm thread fail] [error={0}]'
.
format
(
traceback
.
format_exc
()))
#pool.shutdown(wait=True)
def
compare_thread
(
application_id
,
application_entity
,
uniq_seq
,
ocr_res_id
,
is_ca
=
True
,
is_cms
=
False
):
# POS: application_id, application_entity, uniq_seq, None
# OCR: application_id, business_type(application_entity), None, ocr_res_id
try
:
compare_log
.
info
(
'{0} [receive task] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] [is_ca={5}] '
'[is_cms={6}]'
.
format
(
log_base
,
application_entity
,
application_id
,
uniq_seq
,
ocr_res_id
,
is_ca
,
is_cms
))
# 查看此订单号下是否有未完成的文件,如果有,等?分钟
doc_wait_file_class
=
HILDoc
if
application_entity
==
consts
.
HIL_PREFIX
else
AFCDoc
doc_wait_file_result
=
doc_wait_file_class
.
objects
.
filter
(
application_id
=
application_id
,
status
=
1
)
.
first
()
compare_log
.
info
(
'doc_wait_file_result:{0}'
.
format
(
doc_wait_file_result
))
compare_log
.
info
(
'{0} [comparison unfinished file check] [entity={1}] [id={2}] [doc_wait_file_result={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
))
if
doc_wait_file_result
is
not
None
:
# 实时查询延迟时间
try
:
delay_time_config
=
Configs
.
objects
.
filter
(
id
=
4
)
.
first
()
if
delay_time_config
is
not
None
and
delay_time_config
.
value
is
not
None
and
delay_time_config
.
value
.
isdigit
():
delay_time
=
delay_time_config
.
value
else
:
delay_time
=
0
except
Exception
as
e
:
delay_time
=
0
compare_log
.
info
(
'[get delay_time_config fail] [error={0}]'
.
format
(
traceback
.
format_exc
()))
compare_log
.
info
(
'delay_time:{0}'
.
format
(
delay_time
))
compare_log
.
info
(
'{0} [comparison unfinished file wait delay_time start] [entity={1}] [id={2}] [doc_id={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
.
id
))
try
:
time
.
sleep
(
int
(
delay_time
))
except
Exception
as
e
:
compare_log
.
info
(
'[sleep error] [error={0}]'
.
format
(
traceback
.
format_exc
()))
compare_log
.
info
(
'{0} [comparison unfinished file wait delay_time end] [entity={1}] [id={2}] [doc_id={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
.
id
))
# 根据application_id查找最新的比对信息,如果没有,结束
if
is_ca
:
comparison_class
=
HILComparisonInfo
if
application_entity
==
consts
.
HIL_PREFIX
else
AFCComparisonInfo
...
...
@@ -3925,6 +3914,35 @@ def compare_thread(application_id, application_entity, uniq_seq, ocr_res_id, is_
uniq_seq
,
ocr_res_id
,
is_ca
,
is_cms
))
return
# 查看此订单号下是否有未完成的文件,如果有,等?分钟
try
:
doc_wait_file_class
=
HILDoc
if
application_entity
==
consts
.
HIL_PREFIX
else
AFCDoc
doc_wait_file_result
=
doc_wait_file_class
.
objects
.
filter
(
application_id
=
application_id
,
status
=
1
)
.
first
()
compare_log
.
info
(
'doc_wait_file_result:{0}'
.
format
(
doc_wait_file_result
))
compare_log
.
info
(
'{0} [comparison unfinished file check] [entity={1}] [id={2}] [doc_wait_file_result={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
))
except
Exception
as
e
:
doc_wait_file_result
=
None
compare_log
.
info
(
'[get doc_wait_file_result fail] [error={0}]'
.
format
(
traceback
.
format_exc
()))
if
doc_wait_file_result
is
not
None
:
# 实时查询延迟时间
try
:
delay_time_config
=
Configs
.
objects
.
filter
(
id
=
4
)
.
first
()
if
delay_time_config
is
not
None
and
delay_time_config
.
value
is
not
None
and
delay_time_config
.
value
.
isdigit
():
delay_time
=
delay_time_config
.
value
else
:
delay_time
=
0
except
Exception
as
e
:
delay_time
=
0
compare_log
.
info
(
'[get delay_time_config fail] [error={0}]'
.
format
(
traceback
.
format_exc
()))
compare_log
.
info
(
'delay_time:{0}'
.
format
(
delay_time
))
compare_log
.
info
(
'{0} [comparison unfinished file wait delay_time start] [entity={1}] [id={2}] [doc_id={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
.
id
))
try
:
time
.
sleep
(
int
(
delay_time
))
except
Exception
as
e
:
compare_log
.
info
(
'[sleep error] [error={0}]'
.
format
(
traceback
.
format_exc
()))
compare_log
.
info
(
'{0} [comparison unfinished file wait delay_time end] [entity={1}] [id={2}] [doc_id={3}]'
.
format
(
log_base
,
application_entity
,
application_id
,
doc_wait_file_result
.
id
))
if
is_ca
:
ca_compare
(
application_id
,
application_entity
,
ocr_res_id
,
last_obj
,
ocr_res_dict
)
else
:
...
...
@@ -3969,4 +3987,5 @@ def compare_thread(application_id, application_entity, uniq_seq, ocr_res_id, is_
compare_log
.
error
(
'{0} [Auto SE] [result save error] [entity={1}] [id={2}] [ocr_res_id={3}] '
'[error={4}]'
.
format
(
log_base
,
application_entity
,
application_id
,
ocr_res_id
,
traceback
.
format_exc
()))
except
Exception
as
e
:
compare_log
.
info
(
'[compare_thread error] [error={0}]'
.
format
(
traceback
.
format_exc
()))
...
...
src/settings/__init__.py
View file @
d6fc968
...
...
@@ -102,7 +102,7 @@ for db_setting in DATABASES.values():
'driver'
:
'ODBC Driver 17 for SQL Server'
,
'extra_params'
:
"odbc_cursortype=2"
}
db_setting
[
'CONN_MAX_AGE'
]
=
60
# set this to False if you want to turn off pyodbc's connection pooling
DATABASE_CONNECTION_POOLING
=
True
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment