Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
5501dd21
authored
2020-11-09 21:17:10 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add wechart process
1 parent
37ca9589
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
6 deletions
src/apps/doc/consts.py
src/apps/doc/mixins.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
5501dd2
import
copy
import
re
TIME_NUM
=
600000000
...
...
@@ -909,3 +910,5 @@ PATTERN_LIST = ['收入', '存入', '支出', '支取', '金额', '余额', '发
'摘要/附言'
,
'交易发生额'
,
'交易摘要'
,
'借贷发生额(借:-贷:+)'
,
'借贷发生额(借:-贷:+)'
,
'联机余额'
,
'交易金额(元)'
,
'交易金额(元)'
,
'账户余额(元)'
,
'账户余额(元)'
,
'会计日期'
,
'摘要代码'
,
'摘要信息'
,
'日期'
,
'短摘要'
,
'本次余额'
,
'交易后余额'
,
'交易说明'
,
'帐户余额'
,
'交易日期 记账日期'
]
CN_RE
=
re
.
compile
(
u'[
\u4e00
-
\u9fa5
]'
)
...
...
src/apps/doc/mixins.py
View file @
5501dd2
...
...
@@ -8,13 +8,13 @@ class DocHandler:
@staticmethod
def
get_link
(
doc_id
,
business_type
,
file
=
'pdf'
):
if
file
==
'pdf'
:
return
'/data/{1}/{0}/{
0}.pdf'
.
format
(
doc_id
,
business_type
)
return
'/data/{1}/{0}/{
2}/{0}.pdf'
.
format
(
doc_id
,
business_type
,
consts
.
TMP_DIR_NAME
)
elif
file
==
'img'
:
return
'/data/{1}/{0}/{
0}_img.zip'
.
format
(
doc_id
,
business_type
)
return
'/data/{1}/{0}/{
2}/{0}_img.zip'
.
format
(
doc_id
,
business_type
,
consts
.
TMP_DIR_NAME
)
elif
file
==
'src_excel'
:
return
'/data/{1}/{0}/
src.xlsx'
.
format
(
doc_id
,
business_type
)
return
'/data/{1}/{0}/
{2}/src.xlsx'
.
format
(
doc_id
,
business_type
,
consts
.
TMP_DIR_NAME
)
else
:
return
'/data/{1}/{0}/{
0}.xlsx'
.
format
(
doc_id
,
business_type
)
return
'/data/{1}/{0}/{
2}/{0}.xlsx'
.
format
(
doc_id
,
business_type
,
consts
.
TMP_DIR_NAME
)
def
get_doc_list
(
self
,
doc_queryset
,
business_type
):
for
doc_dict
in
doc_queryset
:
...
...
src/apps/doc/ocr/wb.py
View file @
5501dd2
...
...
@@ -311,7 +311,17 @@ class BSWorkbook(Workbook):
res_str
=
'{0}.{1}'
.
format
(
res_str
[:
pre_idx
],
res_str
[
period_idx
+
1
:])
return
res_str
def
build_month_sheet
(
self
,
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
):
@staticmethod
def
wechart_func
(
row_value
):
row_value
=
list
(
row_value
)
if
isinstance
(
row_value
[
1
],
str
):
cn_chars
=
re
.
findall
(
consts
.
CN_RE
,
row_value
[
1
])
cn_str
=
''
.
join
(
cn_chars
)
row_value
[
2
]
=
cn_str
+
row_value
[
2
]
row_value
[
1
]
=
re
.
sub
(
consts
.
CN_RE
,
''
,
row_value
[
1
])
return
row_value
def
build_month_sheet
(
self
,
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
,
classify
):
summary_cell_idx
=
statistics_header_info
.
get
(
consts
.
SUMMARY_KEY
)
date_cell_idx
=
statistics_header_info
.
get
(
consts
.
DATE_KEY
)
amount_cell_idx
=
statistics_header_info
.
get
(
consts
.
AMOUNT_KEY
)
# None or src or append
...
...
@@ -343,6 +353,8 @@ class BSWorkbook(Workbook):
ws
=
self
.
get_sheet_by_name
(
part
[
0
])
for
row_value
in
ws
.
iter_rows
(
min_row
=
part
[
1
],
max_row
=
part
[
2
],
values_only
=
True
):
if
any
(
row_value
):
if
classify
==
consts
.
WECHART_CLASSIFY
:
row_value
=
self
.
wechart_func
(
row_value
)
new_ws
.
append
(
row_value
)
# 3.2.提取信息、高亮
amount_mapping
=
{}
...
...
@@ -506,7 +518,7 @@ class BSWorkbook(Workbook):
for
month_list
in
month_mapping
.
values
():
month_list
.
sort
(
key
=
lambda
x
:
x
[
-
1
],
reverse
=
is_reverse
)
self
.
build_month_sheet
(
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
)
self
.
build_month_sheet
(
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
,
classify
)
# 4.删除原表
for
sheet
in
sheets_list
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment