Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
ad4deaf3
authored
2025-07-02 16:36:10 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
init:weixin e-bs
1 parent
8ddb1d4c
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
101 additions
and
1 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/views.py
src/apps/doc/management/commands/ocr_process.py
View file @
ad4deaf
...
...
@@ -1504,6 +1504,103 @@ class Command(BaseCommand, LoggerMixin):
self
.
log_base
,
traceback
.
format_exc
()))
# error_list.append(1)
# return
elif
classify_1_str
==
'12'
:
# wenxin bs
self
.
online_log
.
info
(
'{0} [pdf_2_img_2_queue weixin] [task={1}] [times={2}] [pdf_path={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
pdf_path
))
try
:
# pdf下载 处理 图片存储 识别
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
if
doc
.
application_id
.
startswith
(
consts
.
FIXED_APPLICATION_ID_PREFIX
):
self
.
online_log
.
info
(
'{0} [mo ni xia dan] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
pdf_path
))
pdf_handler
.
e_contract_process
()
else
:
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
,
doc
.
document_scheme
,
business_type
)
self
.
online_log
.
info
(
'{0} [edms download success] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
,
pdf_path
))
self
.
online_log
.
info
(
'{0} [pdf to img start] [task={1}] [times={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
))
pdf_handler
.
e_contract_process
()
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
))
except
Exception
as
e
:
self
.
online_log
.
warn
(
'{0} [download or pdf to img failed] [task={1}] [times={2}] '
'[error={3}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
,
traceback
.
format_exc
()))
else
:
break
else
:
raise
Exception
(
'download or pdf to img failed'
)
try
:
doc
.
page_count
=
pdf_handler
.
page_count
doc
.
save
()
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (db save)] [error={1}]'
.
format
(
self
.
log_base
,
traceback
.
format_exc
()))
# 获取算法结果
# ocr_result = wxbs_predict(pdf_handler.pdf_info)
# page_res = {}
# for page_num, page_info in ocr_result.get('page_info', {}).items():
# if isinstance(page_num, str) and page_num.startswith('page_'):
# page_res[page_num] = {
# 'classify': int(classify_1_str),
# "is_asp": ocr_result.get('is_asp', False),
# 'page_num': page_num,
# 'page_info': page_info
# }
# contract_res = {}
# for img_path_tmp, page_key in pdf_handler.img_path_pno_list:
# if classify_1_str == str(consts.HMH_CLASSIFY):
# img_contract_res = {
# 'code': 1,
# 'data': [
# {
# 'classify': consts.HMH_CLASSIFY,
# 'data': hmh_predict(pdf_handler.pdf_info)
# }
# ]
# }
# else:
# if page_key in page_res:
# img_contract_res = {
# 'code': 1,
# 'data': [
# {
# 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
# 'data': page_res[page_key]
# }
# ]
# }
# else:
# img_contract_res = {
# 'code': 1,
# 'data': [
# {
# 'classify': int(classify_1_str),
# }
# ]
# }
# contract_res[img_path_tmp] = img_contract_res
with
lock
:
# res_dict[task_str] = contract_res
res_dict
[
task_str
]
=
'{"/data/AFC/OCR_Files/60671/img/page_0_img_0.png":{"code":1,"msg":"success","data":[{"classify":12,"confidence":0.9999922513961792,"data":[{"summary":["冯轩","110109199202260310",null,null,"2025-06-30","2025-06-25","2025-06-30"],"cells":[{"start_row":0,"start_column":0,"words":"交易单号"},{"start_row":0,"start_column":1,"words":"交易时间"},{"start_row":0,"start_column":2,"words":"交易类型"},{"start_row":0,"start_column":3,"words":"收/支/其他"},{"start_row":0,"start_column":4,"words":"交易方式"},{"start_row":0,"start_column":5,"words":"金额(元)"},{"start_row":0,"start_column":6,"words":"交易对方"},{"start_row":0,"start_column":7,"words":"商户单号"},{"start_row":1,"start_column":0,"words":"4200002687202506298013
\n
175397"},{"start_row":1,"start_column":1,"words":"2025-06-29
\n
13:47:15"},{"start_row":1,"start_column":2,"words":"商户消费"},{"start_row":1,"start_column":3,"words":"支出"},{"start_row":1,"start_column":4,"words":"浦发银行储
\n
蓄卡(9862)"},{"start_row":1,"start_column":5,"words":"333.00"},{"start_row":1,"start_column":6,"words":"美团平台商
\n
户"},{"start_row":1,"start_column":7,"words":"20250629134707U
\n
8927844819024394"},{"start_row":2,"start_column":0,"words":"1000039901017506286326
\n
541033022"},{"start_row":2,"start_column":1,"words":"2025-06-28
\n
21:07:37"},{"start_row":2,"start_column":2,"words":"微信红包"},{"start_row":2,"start_column":3,"words":"收入"},{"start_row":2,"start_column":4,"words":"/"},{"start_row":2,"start_column":5,"words":"1.20"},{"start_row":2,"start_column":6,"words":"默"},{"start_row":2,"start_column":7,"words":"1000039901202506
\n
286326541033022"},{"start_row":3,"start_column":0,"words":"1000039901004506276276
\n
744394049"},{"start_row":3,"start_column":1,"words":"2025-06-27
\n
23:58:18"},{"start_row":3,"start_column":2,"words":"微信红包"},{"start_row":3,"start_column":3,"words":"收入"},{"start_row":3,"start_column":4,"words":"/"},{"start_row":3,"start_column":5,"words":"1.36"},{"start_row":3,"start_column":6,"words":"九日"},{"start_row":3,"start_column":7,"words":"1000039901202506
\n
276276744394049"},{"start_row":4,"start_column":0,"words":"1000039901008506277372
\n
907681001"},{"start_row":4,"start_column":1,"words":"2025-06-27
\n
23:57:27"},{"start_row":4,"start_column":2,"words":"微信红包"},{"start_row":4,"start_column":3,"words":"收入"},{"start_row":4,"start_column":4,"words":"/"},{"start_row":4,"start_column":5,"words":"1.68"},{"start_row":4,"start_column":6,"words":"希望"},{"start_row":4,"start_column":7,"words":"1000039901202506
\n
277372907681001"},{"start_row":5,"start_column":0,"words":"4200002672202506275479
\n
881478"},{"start_row":5,"start_column":1,"words":"2025-06-27
\n
19:50:21"},{"start_row":5,"start_column":2,"words":"商户消费"},{"start_row":5,"start_column":3,"words":"支出"},{"start_row":5,"start_column":4,"words":"浦发银行储
\n
蓄卡(9862)"},{"start_row":5,"start_column":5,"words":"124.00"},{"start_row":5,"start_column":6,"words":"呷哺呷哺(北
\n
京首联世纪
\n
广场)"},{"start_row":5,"start_column":7,"words":";8011433A2506271
\n
95001M000105"}],"verify":{"verify_res":"real","verify_info":[]}}]}]}}'
finish_queue
.
put
(
task_str
)
except
Exception
as
e
:
try
:
doc
.
status
=
DocStatus
.
PROCESS_FAILED
.
value
doc
.
page_count
=
pdf_handler
.
page_count
doc
.
save
()
self
.
online_log
.
warn
(
'{0} [process failed (e-contract)] [task={1}] '
'[error={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
traceback
.
format_exc
()))
except
Exception
as
e
:
self
.
online_log
.
error
(
'{0} [process error (db save)] [error={1}]'
.
format
(
self
.
e_log_base
,
traceback
.
format_exc
()))
else
:
# e-contract or or e-fsm-contract or e-hmh
try
:
# pdf下载 处理 图片存储 识别
...
...
@@ -1737,7 +1834,7 @@ class Command(BaseCommand, LoggerMixin):
time
.
sleep
(
self
.
sleep_time_task_get
)
continue
else
:
self
.
online_log
.
info
(
'{0} [res_2_wb] [get task] [task={1}]
'
.
format
(
self
.
log_base
,
task_str
))
self
.
online_log
.
info
(
'{0} [res_2_wb] [get task] [task={1}]
[res_dict={2}]'
.
format
(
self
.
log_base
,
task_str
,
res_dict
))
ocr_1_res
=
res_dict
.
pop
(
task_str
,
{})
business_type
,
doc_id_str
=
task_str
.
split
(
consts
.
SPLIT_STR
)
...
...
src/apps/doc/views.py
View file @
ad4deaf
...
...
@@ -683,6 +683,9 @@ class UploadDocView(GenericView, DocHandler):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
break
elif
'微信支付交易明细证明'
in
document_name
or
'微信流水'
in
document_name
:
classify_1
=
12
self
.
running_log
.
info
(
'[weixin bs process] [doc_id={0}]'
.
format
(
doc
.
id
))
if
document_name
.
endswith
(
'.zip'
)
or
document_name
.
endswith
(
'.rar'
)
or
document_name
.
endswith
(
'.ZIP'
)
\
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment