Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
266f2739
authored
2020-10-26 18:31:13 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
99390121
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
6 deletions
.gitignore
src/apps/doc/mixins.py
src/apps/doc/ocr/wb.py
.gitignore
View file @
266f273
...
...
@@ -35,3 +35,4 @@ src/*.sh
test*
ocr_test.py
ocr_process.py
\ No newline at end of file
...
...
src/apps/doc/mixins.py
View file @
266f273
...
...
@@ -18,7 +18,7 @@ class DocHandler:
def
get_doc_list
(
self
,
doc_queryset
,
business_type
):
for
doc_dict
in
doc_queryset
:
if
doc_dict
[
'status'
]
!=
DocStatus
.
COMPLETE
.
value
:
if
doc_dict
[
'status'
]
not
in
[
DocStatus
.
COMPLETE
.
value
,
DocStatus
.
UPLOAD_FAILED
.
value
]
:
continue
doc_id
=
doc_dict
.
get
(
'id'
)
doc_dict
[
'pdf_link'
]
=
self
.
get_link
(
doc_id
,
business_type
)
...
...
src/apps/doc/ocr/wb.py
View file @
266f273
...
...
@@ -30,7 +30,7 @@ class BSWorkbook(Workbook):
self
.
MAX_MEAN
=
31
@staticmethod
def
header_collect
(
ws
,
sheet_header_info
,
header_info
):
def
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
):
# sheet_header_info = {
# 'sheet_name': {
# 'summary_col': 1,
...
...
@@ -77,6 +77,7 @@ class BSWorkbook(Workbook):
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
FIND_COUNT_KEY
,
find_count
)
min_row
=
1
if
find_count
==
0
else
2
sheet_header_info
.
setdefault
(
ws
.
title
,
{})
.
setdefault
(
consts
.
MIN_ROW_KEY
,
min_row
)
max_column_list
.
append
(
ws
.
max_column
)
@staticmethod
def
header_statistics
(
sheet_header_info
,
header_info
,
classify
):
...
...
@@ -135,7 +136,7 @@ class BSWorkbook(Workbook):
else
:
fixed_col
=
consts
.
CLASSIFY_LIST
[
classify
][
1
][
consts
.
CLASSIFY_MAP
[
consts
.
DATE_KEY
]]
if
fixed_col
not
in
find_col_set
and
isinstance
(
fixed_col
,
int
):
date_col
=
fixed_col
date_col
=
fixed_col
-
1
min_row
=
sheet_header_info
.
get
(
sheet
,
{})
.
get
(
consts
.
MIN_ROW_KEY
,
2
)
return
date_col
,
min_row
...
...
@@ -283,7 +284,7 @@ class BSWorkbook(Workbook):
res_str
=
'{0}.{1}'
.
format
(
res_str
[:
period_idx
],
res_str
[
period_idx
+
1
:])
return
res_str
def
build_month_sheet
(
self
,
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
):
def
build_month_sheet
(
self
,
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
):
summary_cell_idx
=
statistics_header_info
.
get
(
consts
.
SUMMARY_KEY
)
date_cell_idx
=
statistics_header_info
.
get
(
consts
.
DATE_KEY
)
amount_cell_idx
=
statistics_header_info
.
get
(
consts
.
AMOUNT_KEY
)
# None or src or append
...
...
@@ -292,6 +293,10 @@ class BSWorkbook(Workbook):
outlay_cell_idx
=
statistics_header_info
.
get
(
consts
.
OUTLAY_KEY
)
borrow_cell_idx
=
statistics_header_info
.
get
(
consts
.
BORROW_KEY
)
header
=
list
(
statistics_header_info
.
get
(
consts
.
HEADER_KEY
))
src_header_len
=
len
(
header
)
if
max_column
>
src_header_len
:
for
i
in
range
(
max_column
-
src_header_len
):
header
.
append
(
None
)
add_col
=
[
'核对结果'
]
if
amount_cell_idx
is
None
:
...
...
@@ -431,10 +436,12 @@ class BSWorkbook(Workbook):
classify
=
summary
.
get
(
'classify'
,
0
)
sheet_header_info
=
{}
header_info
=
{}
max_column_list
=
[]
for
sheet
in
summary
.
get
(
'sheet'
,
[]):
ws
=
self
.
get_sheet_by_name
(
sheet
)
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
)
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
)
statistics_header_info
=
self
.
header_statistics
(
sheet_header_info
,
header_info
,
classify
)
max_column
=
max
(
max_column_list
)
# 1.2.按月份分割 min_row 正文第一行 date_col 日期行
start_date
=
summary
.
get
(
'start_date'
)
...
...
@@ -466,7 +473,7 @@ class BSWorkbook(Workbook):
for
month_list
in
month_mapping
.
values
():
month_list
.
sort
(
key
=
lambda
x
:
x
[
-
1
],
reverse
=
is_reverse
)
self
.
build_month_sheet
(
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
)
self
.
build_month_sheet
(
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
)
# 4.删除原表
for
sheet
in
summary
.
get
(
'sheet'
):
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment