Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
f682cf20
authored
2020-10-14 15:53:20 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix amount
1 parent
e6c82ee3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
5 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/doc_ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
f682cf2
...
...
@@ -41,13 +41,15 @@ TRANS_MAP = {
'C'
:
"0"
,
'c'
:
"0"
,
'('
:
"0"
,
'('
:
"0"
,
'o'
:
"0"
,
'O'
:
"0"
,
'D'
:
"0"
,
'['
:
"1"
,
']'
:
"1"
,
'l'
:
"1"
,
'L'
:
"1"
,
'A'
:
"4"
,
's'
:
"5"
,
'S'
:
"5"
,
...
...
@@ -57,6 +59,7 @@ TRANS_MAP = {
'B'
:
"13"
,
}
TRANS
=
str
.
maketrans
(
TRANS_MAP
)
ERROR_CHARS
=
{
'.'
,
'·'
,
'•'
}
CARD_RATIO
=
0.9
UNKNOWN_CARD
=
'未知卡号'
...
...
src/apps/doc/management/commands/doc_ocr_process.py
View file @
f682cf2
...
...
@@ -458,7 +458,6 @@ class Command(BaseCommand, LoggerMixin):
pdf_handler
.
extract_image
()
self
.
cronjob_log
.
info
(
'{0} [pdf to img end] [business_type={1}] [doc_id={2}]'
.
format
(
self
.
log_base
,
business_type
,
doc
.
id
))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
# 4.获取OCR结果并且构建excel文件
bs_summary
=
{}
...
...
@@ -514,6 +513,8 @@ class Command(BaseCommand, LoggerMixin):
speed_time
=
int
(
end_time
-
start_time
)
self
.
cronjob_log
.
error
(
'{0} [upload failed] [business_type={1}] [doc_id={2}] [speed_time={3}] '
'[err={4}]'
.
format
(
self
.
log_base
,
business_type
,
doc
.
id
,
speed_time
,
e
))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
else
:
doc
.
status
=
DocStatus
.
COMPLETE
.
value
doc
.
save
()
...
...
@@ -521,5 +522,6 @@ class Command(BaseCommand, LoggerMixin):
speed_time
=
int
(
end_time
-
start_time
)
self
.
cronjob_log
.
info
(
'{0} [process complete] [business_type={1}] [doc_id={2}] '
'[speed_time={3}]'
.
format
(
self
.
log_base
,
business_type
,
doc
.
id
,
speed_time
))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
self
.
cronjob_log
.
info
(
'{0} [stop safely]'
.
format
(
self
.
log_base
))
...
...
src/apps/doc/ocr/wb.py
View file @
f682cf2
...
...
@@ -207,11 +207,20 @@ class BSWorkbook(Workbook):
def
amount_format
(
amount_str
):
if
not
isinstance
(
amount_str
,
str
)
or
amount_str
==
''
:
return
amount_str
# 替换
#
1.
替换
res_str
=
amount_str
.
translate
(
consts
.
TRANS
)
# 删除多余的-
#
2.
删除多余的-
res_str
=
res_str
[
0
]
+
res_str
[
1
:]
.
replace
(
'-'
,
''
)
# TODO 逗号与句号处理
# 3.首字符处理
if
res_str
[
0
]
in
consts
.
ERROR_CHARS
:
res_str
=
'-{0}'
.
format
(
res_str
[
1
:])
# 4.逗号与句号处理
if
len
(
res_str
)
>=
4
:
period_idx
=
len
(
res_str
)
-
3
if
res_str
[
period_idx
]
==
'.'
and
res_str
[
period_idx
-
1
]
==
','
:
res_str
=
'{0}{1}'
.
format
(
res_str
[:
period_idx
-
1
],
res_str
[
period_idx
:])
elif
res_str
[
period_idx
]
==
','
:
res_str
=
'{0}.{1}'
.
format
(
res_str
[:
period_idx
],
res_str
[
period_idx
+
1
:])
return
res_str
def
build_month_sheet
(
self
,
card
,
month_mapping
,
ms
,
is_reverse
):
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment