Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
f821dc29
authored
2025-07-04 14:06:07 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
mod
1 parent
e40ee582
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
44 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/management/commands/ocr_process.py
View file @
f821dc2
...
...
@@ -1542,50 +1542,50 @@ class Command(BaseCommand, LoggerMixin):
self
.
log_base
,
traceback
.
format_exc
()))
# 获取算法结果
#
ocr_result = wxbs_predict(pdf_handler.pdf_info)
#
page_res = {}
#
for page_num, page_info in ocr_result.get('page_info', {}).items():
#
if isinstance(page_num, str) and page_num.startswith('page_'):
#
page_res[page_num] = {
#
'classify': int(classify_1_str),
#
"is_asp": ocr_result.get('is_asp', False),
#
'page_num': page_num,
#
'page_info': page_info
#
}
#
contract_res = {}
#
for img_path_tmp, page_key in pdf_handler.img_path_pno_list:
#
if classify_1_str == str(consts.HMH_CLASSIFY):
#
img_contract_res = {
#
'code': 1,
#
'data': [
#
{
#
'classify': consts.HMH_CLASSIFY,
#
'data': hmh_predict(pdf_handler.pdf_info)
#
}
#
]
#
}
#
else:
#
if page_key in page_res:
#
img_contract_res = {
#
'code': 1,
#
'data': [
#
{
#
'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
#
'data': page_res[page_key]
#
}
#
]
#
}
#
else:
#
img_contract_res = {
#
'code': 1,
#
'data': [
#
{
#
'classify': int(classify_1_str),
#
}
#
]
#
}
#
contract_res[img_path_tmp] = img_contract_res
ocr_result
=
wxbs_predict
(
pdf_handler
.
pdf_info
)
page_res
=
{}
for
page_num
,
page_info
in
ocr_result
.
get
(
'page_info'
,
{})
.
items
():
if
isinstance
(
page_num
,
str
)
and
page_num
.
startswith
(
'page_'
):
page_res
[
page_num
]
=
{
'classify'
:
int
(
classify_1_str
),
"is_asp"
:
ocr_result
.
get
(
'is_asp'
,
False
),
'page_num'
:
page_num
,
'page_info'
:
page_info
}
contract_res
=
{}
for
img_path_tmp
,
page_key
in
pdf_handler
.
img_path_pno_list
:
if
classify_1_str
==
str
(
consts
.
HMH_CLASSIFY
):
img_contract_res
=
{
'code'
:
1
,
'data'
:
[
{
'classify'
:
consts
.
HMH_CLASSIFY
,
'data'
:
hmh_predict
(
pdf_handler
.
pdf_info
)
}
]
}
else
:
if
page_key
in
page_res
:
img_contract_res
=
{
'code'
:
1
,
'data'
:
[
{
'classify'
:
page_res
[
page_key
]
.
pop
(
'classify'
,
consts
.
OTHER_CLASSIFY
),
'data'
:
page_res
[
page_key
]
}
]
}
else
:
img_contract_res
=
{
'code'
:
1
,
'data'
:
[
{
'classify'
:
int
(
classify_1_str
),
}
]
}
contract_res
[
img_path_tmp
]
=
img_contract_res
with
lock
:
res_str
=
'''{"/data/AFC/OCR_Files/60671/img/page_0_img_0.png":{"code":1,"msg":"success","data":[{"classify":12,"confidence":0.9999922513961792,"data":[{"summary":["冯轩","110109199202260310",null,null,"2025-06-30","2025-06-25","2025-06-30"],"cells":[{"start_row":0,"start_column":0,"words":"交易单号"},{"start_row":0,"start_column":1,"words":"交易时间"},{"start_row":0,"start_column":2,"words":"交易类型"},{"start_row":0,"start_column":3,"words":"收/支/其他"},{"start_row":0,"start_column":4,"words":"交易方式"},{"start_row":0,"start_column":5,"words":"金额(元)"},{"start_row":0,"start_column":6,"words":"交易对方"},{"start_row":0,"start_column":7,"words":"商户单号"},{"start_row":1,"start_column":0,"words":"4200002687202506298013
\\
n175397"},{"start_row":1,"start_column":1,"words":"2025-06-29
\\
n13:47:15"},{"start_row":1,"start_column":2,"words":"商户消费"},{"start_row":1,"start_column":3,"words":"支出"},{"start_row":1,"start_column":4,"words":"浦发银行储
\\
n蓄卡(9862)"},{"start_row":1,"start_column":5,"words":"333.00"},{"start_row":1,"start_column":6,"words":"美团平台商
\\
n户"},{"start_row":1,"start_column":7,"words":"20250629134707U
\\
n8927844819024394"},{"start_row":2,"start_column":0,"words":"1000039901017506286326
\\
n541033022"},{"start_row":2,"start_column":1,"words":"2025-06-28
\\
n21:07:37"},{"start_row":2,"start_column":2,"words":"微信红包"},{"start_row":2,"start_column":3,"words":"收入"},{"start_row":2,"start_column":4,"words":"/"},{"start_row":2,"start_column":5,"words":"1.20"},{"start_row":2,"start_column":6,"words":"默"},{"start_row":2,"start_column":7,"words":"1000039901202506
\\
n286326541033022"},{"start_row":3,"start_column":0,"words":"1000039901004506276276
\\
n744394049"},{"start_row":3,"start_column":1,"words":"2025-06-27
\\
n23:58:18"},{"start_row":3,"start_column":2,"words":"微信红包"},{"start_row":3,"start_column":3,"words":"收入"},{"start_row":3,"start_column":4,"words":"/"},{"start_row":3,"start_column":5,"words":"1.36"},{"start_row":3,"start_column":6,"words":"九日"},{"start_row":3,"start_column":7,"words":"1000039901202506
\\
n276276744394049"},{"start_row":4,"start_column":0,"words":"1000039901008506277372
\\
n907681001"},{"start_row":4,"start_column":1,"words":"2025-06-27
\\
n23:57:27"},{"start_row":4,"start_column":2,"words":"微信红包"},{"start_row":4,"start_column":3,"words":"收入"},{"start_row":4,"start_column":4,"words":"/"},{"start_row":4,"start_column":5,"words":"1.68"},{"start_row":4,"start_column":6,"words":"希望"},{"start_row":4,"start_column":7,"words":"1000039901202506
\\
n277372907681001"},{"start_row":5,"start_column":0,"words":"4200002672202506275479
\\
n881478"},{"start_row":5,"start_column":1,"words":"2025-06-27
\\
n19:50:21"},{"start_row":5,"start_column":2,"words":"商户消费"},{"start_row":5,"start_column":3,"words":"支出"},{"start_row":5,"start_column":4,"words":"浦发银行储
\\
n蓄卡(9862)"},{"start_row":5,"start_column":5,"words":"124.00"},{"start_row":5,"start_column":6,"words":"呷哺呷哺(北
\\
n京首联世纪
\\
n广场)"},{"start_row":5,"start_column":7,"words":";8011433A2506271
\\
n95001M000105"}],"verify":{"verify_res":"real","verify_info":[]}}]}]}}'''
...
...
@@ -1808,6 +1808,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [get lock] [img={1}] '
.
format
(
self
.
log_base
,
img_path
))
doc_res_dict
=
res_dict
.
setdefault
(
task_str
,
{})
doc_res_dict
[
img_path
]
=
ocr_1_res
self
.
online_log
.
info
(
'{0} [ocr_1_res={1}] '
.
format
(
self
.
log_base
,
ocr_1_res
))
res_dict
[
task_str
]
=
doc_res_dict
todo_count
=
todo_count_dict
.
get
(
task_str
)
if
todo_count
==
1
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment