Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
3f659eb8
authored
2020-11-07 01:05:25 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add count summary
1 parent
95499726
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
179 additions
and
79 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/apps/doc/ocr/edms.py
src/apps/doc/ocr/wb.py
src/apps/doc/views.py
src/common/tools/mssql_script.py
src/settings/conf/prd.ini
src/settings/conf/sit.ini
src/settings/conf/uat.ini
src/apps/doc/consts.py
View file @
3f659eb
...
...
@@ -858,17 +858,30 @@ FIELD_ORDER_MAP = {
MVC_CLASSIFY_SE
:
(
'转移登记日期'
,
MVC_SE_FIELD_ORDER_3_4
,
MVC_SE_FIELD_ORDER_1_2
)
}
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
,
MVI_FIELD_ORDER
,
False
,
False
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
,
None
,
True
,
False
)),
(
RP_CLASSIFY
,
(
None
,
RP_CN_NAME
,
None
,
True
,
False
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
,
BC_FIELD_ORDER
,
False
,
False
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
,
BL_FIELD_ORDER
,
False
,
False
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
,
UCI_FIELD_ORDER
,
False
,
False
)),
(
EEP_CLASSIFY
,
(
EEP_PID
,
EEP_CN_NAME
,
EEP_FIELD_ORDER
,
False
,
False
)),
(
DL_CLASSIFY
,
(
DL_PID
,
DL_CN_NAME
,
None
,
True
,
False
)),
(
PP_CLASSIFY
,
(
PP_PID
,
PP_CN_NAME
,
PP_FIELD_ORDER
,
False
,
False
)),
(
MVC_CLASSIFY
,
(
MVC_PID
,
MVC_CN_NAME
,
None
,
True
,
True
)),
(
VAT_CLASSIFY
,
(
VAT_PID
,
VAT_CN_NAME
,
VAT_FIELD_ORDER
,
False
,
False
)))
MODEL_FIELD_BS
=
'bs_count'
MODEL_FIELD_MVI
=
'mvi_count'
MODEL_FIELD_IC
=
'ic_count'
MODEL_FIELD_RP
=
'rp_count'
MODEL_FIELD_BC
=
'bc_count'
MODEL_FIELD_BL
=
'bl_count'
MODEL_FIELD_UCI
=
'uci_count'
MODEL_FIELD_EEP
=
'eep_count'
MODEL_FIELD_DL
=
'dl_count'
MODEL_FIELD_PP
=
'pp_count'
MODEL_FIELD_MVC
=
'mvc_count'
MODEL_FIELD_VAT
=
'vat_count'
LICENSE_ORDER
=
((
MVI_CLASSIFY
,
(
MVI_PID
,
MVI_CN_NAME
,
MVI_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_MVI
)),
(
IC_CLASSIFY
,
(
IC_PID
,
IC_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_IC
)),
(
RP_CLASSIFY
,
(
None
,
RP_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_RP
)),
(
BC_CLASSIFY
,
(
BC_PID
,
BC_CN_NAME
,
BC_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_BC
)),
(
BL_CLASSIFY
,
(
BL_PID
,
BL_CN_NAME
,
BL_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_BL
)),
(
UCI_CLASSIFY
,
(
UCI_PID
,
UCI_CN_NAME
,
UCI_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_UCI
)),
(
EEP_CLASSIFY
,
(
EEP_PID
,
EEP_CN_NAME
,
EEP_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_EEP
)),
(
DL_CLASSIFY
,
(
DL_PID
,
DL_CN_NAME
,
None
,
True
,
False
,
MODEL_FIELD_DL
)),
(
PP_CLASSIFY
,
(
PP_PID
,
PP_CN_NAME
,
PP_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_PP
)),
(
MVC_CLASSIFY
,
(
MVC_PID
,
MVC_CN_NAME
,
None
,
True
,
True
,
MODEL_FIELD_MVC
)),
(
VAT_CLASSIFY
,
(
VAT_PID
,
VAT_CN_NAME
,
VAT_FIELD_ORDER
,
False
,
False
,
MODEL_FIELD_VAT
)))
LICENSE_CLASSIFY_MAPPING
=
dict
(
LICENSE_ORDER
)
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
3f659eb
...
...
@@ -9,6 +9,7 @@ import difflib
import
requests
from
collections
import
Counter
from
datetime
import
datetime
,
date
from
django.utils
import
timezone
from
django.core.management
import
BaseCommand
from
multiprocessing
import
Process
,
Queue
,
Manager
,
Lock
...
...
@@ -32,7 +33,12 @@ class Command(BaseCommand, LoggerMixin):
# 处理文件开关
self
.
switch
=
True
# 睡眠时间
self
.
sleep_time
=
int
(
conf
.
SLEEP_SECOND
)
self
.
sleep_time_doc_get
=
float
(
conf
.
SLEEP_SECOND_DOC_GET
)
self
.
sleep_time_img_put
=
float
(
conf
.
SLEEP_SECOND_IMG_PUT
)
self
.
sleep_time_img_get
=
float
(
conf
.
SLEEP_SECOND_IMG_GET
)
self
.
sleep_time_task_get
=
float
(
conf
.
SLEEP_SECOND_TASK_GET
)
# 队列长度
self
.
img_queue_size
=
int
(
conf
.
IMG_QUEUE_SIZE
)
# 数据目录
self
.
data_dir
=
conf
.
DATA_DIR
# ocr相关
...
...
@@ -73,7 +79,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
cronjob_log
.
warn
(
'{0} [get_doc_info] [doc status error] [task_str={1}] [is_priority={2}] '
'[doc_status={3}]'
.
format
(
self
.
log_base
,
task_str
,
is_priority
,
doc
.
status
))
return
None
,
None
,
None
doc
.
status
=
DocStatus
.
PROCESSING
.
value
# TODO update_time --> start_time
doc
.
status
=
DocStatus
.
PROCESSING
.
value
doc
.
start_time
=
timezone
.
now
()
doc
.
save
()
self
.
cronjob_log
.
info
(
'{0} [get_doc_info] [success] [task_str={1}] [is_priority={2}]'
.
format
(
self
.
log_base
,
task_str
,
is_priority
))
...
...
@@ -360,7 +367,7 @@ class Command(BaseCommand, LoggerMixin):
doc
,
business_type
,
task_str
=
self
.
get_doc_info
()
# 队列为空时的处理
if
doc
is
None
:
time
.
sleep
(
self
.
sleep_time
)
time
.
sleep
(
self
.
sleep_time
_doc_get
)
continue
try
:
...
...
@@ -368,19 +375,26 @@ class Command(BaseCommand, LoggerMixin):
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
str
(
doc
.
id
))
os
.
makedirs
(
doc_data_path
,
exist_ok
=
True
)
pdf_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.pdf'
.
format
(
doc
.
id
))
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
self
.
pdf_download
(
doc
,
pdf_path
)
# 3.PDF文件提取图片
self
.
cronjob_log
.
info
(
'{0} [pdf to img start] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
start_time
=
time
.
time
()
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
pdf_handler
=
PDFHandler
(
pdf_path
,
img_save_path
)
pdf_handler
.
extract_image
()
self
.
cronjob_log
.
info
(
'{0} [pdf to img end] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
end_time
=
time
.
time
()
speed_time
=
int
(
end_time
-
start_time
)
self
.
cronjob_log
.
info
(
'{0} [pdf to img end] [task={1}] [spend_time={2}]'
.
format
(
self
.
log_base
,
task_str
,
speed_time
))
with
lock
:
todo_count_dict
[
task_str
]
=
len
(
pdf_handler
.
img_path_list
)
for
img_path
in
pdf_handler
.
img_path_list
:
img_queue
.
put
(
img_path
)
# TODO 队列控制
while
img_queue
.
full
():
self
.
cronjob_log
.
info
(
'{0} [pdf_2_img_2_queue] [img queue full]'
.
format
(
self
.
log_base
))
time
.
sleep
(
self
.
sleep_time_img_put
)
img_queue
.
put
(
img_path
)
except
EDMSException
as
e
:
doc
.
status
=
DocStatus
.
PROCESS_FAILED
.
value
doc
.
save
()
...
...
@@ -398,7 +412,7 @@ class Command(BaseCommand, LoggerMixin):
img_path
=
img_queue
.
get
(
block
=
False
)
except
Exception
as
e
:
# self.cronjob_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
time
.
sleep
(
0.5
)
time
.
sleep
(
self
.
sleep_time_img_get
)
continue
else
:
self
.
cronjob_log
.
info
(
'{0} [img_2_ocr_1] [get img] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
...
...
@@ -454,7 +468,7 @@ class Command(BaseCommand, LoggerMixin):
task_str
=
finish_queue
.
get
(
block
=
False
)
except
Exception
as
e
:
# self.cronjob_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base))
time
.
sleep
(
1
)
time
.
sleep
(
self
.
sleep_time_task_get
)
continue
else
:
self
.
cronjob_log
.
info
(
'{0} [res_2_wb] [get task] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
...
...
@@ -492,7 +506,7 @@ class Command(BaseCommand, LoggerMixin):
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_1
:
# 证件1
self
.
license1_process
(
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
)
elif
classify
in
consts
.
LICENSE_CLASSIFY_SET_2
:
# 证件2
pid
,
_
,
_
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
pid
,
_
,
_
,
_
,
_
,
_
=
consts
.
LICENSE_CLASSIFY_MAPPING
.
get
(
classify
)
with
open
(
img_path
,
'rb'
)
as
f
:
base64_data
=
base64
.
b64encode
(
f
.
read
())
# 获取解码后的base64值
...
...
@@ -548,8 +562,8 @@ class Command(BaseCommand, LoggerMixin):
with
lock
:
del
res_dict
[
task_str
]
self
.
cronjob_log
.
info
(
'{0} [res_dict record] [res_dict={1}]'
.
format
(
self
.
log_base
,
res_dict
))
#
self.cronjob_log.info('{0} [res_dict record] [res_dict={1}]'.format(
#
self.log_base, res_dict))
self
.
cronjob_log
.
info
(
'{0} [task={1}] [bs_summary={2}] [unknown_summary={3}] '
'[license_summary={4}]'
.
format
(
self
.
log_base
,
task_str
,
bs_summary
,
...
...
@@ -568,7 +582,7 @@ class Command(BaseCommand, LoggerMixin):
excel_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.xlsx'
.
format
(
doc
.
id
))
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
# wb.save(src_excel_path)
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
)
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
with
lock
:
...
...
@@ -576,7 +590,7 @@ class Command(BaseCommand, LoggerMixin):
del
res_dict
[
task_str
]
doc
,
_
=
self
.
get_doc_object
(
task_str
)
doc
.
status
=
DocStatus
.
PROCESS_FAILED
.
value
doc
.
save
()
# TODO end_time
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (res to wb)] [task={1}] [err={2}]'
.
format
(
self
.
log_base
,
task_str
,
e
))
else
:
...
...
@@ -595,7 +609,12 @@ class Command(BaseCommand, LoggerMixin):
else
:
raise
EDMSException
(
edms_exc
)
except
Exception
as
e
:
doc
.
status
=
DocStatus
.
UPLOAD_FAILED
.
value
# TODO end_time
doc
.
status
=
DocStatus
.
UPLOAD_FAILED
.
value
doc
.
end_time
=
timezone
.
now
()
doc
.
duration
=
(
doc
.
start_time
-
doc
.
end_time
)
.
seconds
for
field
,
count
in
count_list
:
if
hasattr
(
doc
,
field
):
setattr
(
doc
,
field
,
count
)
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (edms upload)] [task={1}] [err={2}]'
.
format
(
self
.
log_base
,
task_str
,
e
))
...
...
@@ -603,7 +622,12 @@ class Command(BaseCommand, LoggerMixin):
else
:
doc
.
status
=
DocStatus
.
COMPLETE
.
value
doc
.
save
()
# TODO end_time
doc
.
end_time
=
timezone
.
now
()
doc
.
duration
=
(
doc
.
start_time
-
doc
.
end_time
)
.
seconds
for
field
,
count
in
count_list
:
if
hasattr
(
doc
,
field
):
setattr
(
doc
,
field
,
count
)
doc
.
save
()
self
.
cronjob_log
.
info
(
'{0} [process complete] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
...
...
@@ -617,7 +641,7 @@ class Command(BaseCommand, LoggerMixin):
with
Manager
()
as
manager
:
todo_count_dict
=
manager
.
dict
()
res_dict
=
manager
.
dict
()
img_queue
=
Queue
()
img_queue
=
Queue
(
self
.
img_queue_size
)
finish_queue
=
Queue
()
process_list
=
[]
...
...
src/apps/doc/models.py
View file @
3f659eb
...
...
@@ -35,16 +35,31 @@ class HILDoc(models.Model):
retry_step
=
models
.
SmallIntegerField
(
null
=
True
,
verbose_name
=
"重试环节"
)
retry_times
=
models
.
SmallIntegerField
(
default
=
0
,
verbose_name
=
"重试次数"
)
is_retry
=
models
.
BooleanField
(
default
=
False
,
verbose_name
=
"是否需要重试"
)
main_applicant
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"主申请人"
)
co_applicant
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"共同申请人"
)
guarantor_1
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"担保人1"
)
guarantor_2
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"担保人2"
)
#
main_applicant = models.CharField(max_length=16, verbose_name="主申请人")
#
co_applicant = models.CharField(max_length=16, verbose_name="共同申请人")
#
guarantor_1 = models.CharField(max_length=16, verbose_name="担保人1")
#
guarantor_2 = models.CharField(max_length=16, verbose_name="担保人2")
document_name
=
models
.
CharField
(
max_length
=
255
,
verbose_name
=
"文件名"
)
document_scheme
=
models
.
CharField
(
max_length
=
64
,
verbose_name
=
"文件方案"
)
data_source
=
models
.
CharField
(
max_length
=
64
,
verbose_name
=
"数据源"
)
upload_finish_time
=
models
.
DateTimeField
(
verbose_name
=
"上传完成时间"
)
# 索引
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
# 索引
start_time
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'开始时间'
)
# 联合索引
end_time
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'结束时间'
)
# 联合索引
duration
=
models
.
IntegerField
(
null
=
True
,
verbose_name
=
'处理时长'
)
bs_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'银行流水处理数目'
)
mvi_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'机动车销售统一发票处理数目'
)
ic_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'身份证处理数目'
)
rp_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'居住证处理数目'
)
bc_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'银行卡处理数目'
)
bl_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'营业执照处理数目'
)
uci_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'二手车发票处理数目'
)
eep_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'港澳台通行证处理数目'
)
dl_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'行驶证处理数目'
)
pp_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'护照处理数目'
)
mvc_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'机动车登记证书处理数目'
)
vat_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'增值税发票处理数目'
)
class
Meta
:
managed
=
False
...
...
@@ -59,16 +74,31 @@ class AFCDoc(models.Model):
retry_step
=
models
.
SmallIntegerField
(
null
=
True
,
verbose_name
=
"重试环节"
)
retry_times
=
models
.
SmallIntegerField
(
default
=
0
,
verbose_name
=
"重试次数"
)
is_retry
=
models
.
BooleanField
(
default
=
False
,
verbose_name
=
"是否需要重试"
)
main_applicant
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"主申请人"
)
co_applicant
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"共同申请人"
)
guarantor_1
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"担保人1"
)
guarantor_2
=
models
.
CharField
(
max_length
=
16
,
verbose_name
=
"担保人2"
)
#
main_applicant = models.CharField(max_length=16, verbose_name="主申请人")
#
co_applicant = models.CharField(max_length=16, verbose_name="共同申请人")
#
guarantor_1 = models.CharField(max_length=16, verbose_name="担保人1")
#
guarantor_2 = models.CharField(max_length=16, verbose_name="担保人2")
document_name
=
models
.
CharField
(
max_length
=
255
,
verbose_name
=
"文件名"
)
document_scheme
=
models
.
CharField
(
max_length
=
64
,
verbose_name
=
"文件方案"
)
data_source
=
models
.
CharField
(
max_length
=
64
,
verbose_name
=
"数据源"
)
upload_finish_time
=
models
.
DateTimeField
(
verbose_name
=
"上传完成时间"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
start_time
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'开始时间'
)
end_time
=
models
.
DateTimeField
(
null
=
True
,
verbose_name
=
'结束时间'
)
duration
=
models
.
IntegerField
(
null
=
True
,
verbose_name
=
'处理时长'
)
bs_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'银行流水处理数目'
)
mvi_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'机动车销售统一发票处理数目'
)
ic_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'身份证处理数目'
)
rp_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'居住证处理数目'
)
bc_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'银行卡处理数目'
)
bl_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'营业执照处理数目'
)
uci_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'二手车发票处理数目'
)
eep_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'港澳台通行证处理数目'
)
dl_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'行驶证处理数目'
)
pp_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'护照处理数目'
)
mvc_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'机动车登记证书处理数目'
)
vat_count
=
models
.
IntegerField
(
default
=
0
,
verbose_name
=
'增值税发票处理数目'
)
class
Meta
:
managed
=
False
...
...
src/apps/doc/ocr/edms.py
View file @
3f659eb
...
...
@@ -57,7 +57,7 @@ class EDMS:
r
=
requests
.
get
(
self
.
download_url
,
params
=
params
,
headers
=
headers
,
stream
=
True
)
with
open
(
save_path
,
"wb"
)
as
f
:
# chunk是指定每次写入的大小,每次只写了512byte
for
chunk
in
r
.
iter_content
(
chunk_size
=
512
):
for
chunk
in
r
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
f
.
flush
()
...
...
src/apps/doc/ocr/wb.py
View file @
3f659eb
...
...
@@ -321,7 +321,6 @@ class BSWorkbook(Workbook):
amount_fill_row
=
set
()
for
rows
in
new_ws
.
iter_rows
(
min_row
=
2
):
# TODO 删除空行
summary_cell
=
None
if
summary_cell_idx
is
None
else
rows
[
summary_cell_idx
]
date_cell
=
None
if
date_cell_idx
is
None
else
rows
[
date_cell_idx
]
amount_cell
=
None
if
amount_cell_idx
is
None
else
rows
[
amount_cell_idx
]
...
...
@@ -441,7 +440,8 @@ class BSWorkbook(Workbook):
sheet_header_info
=
{}
header_info
=
{}
max_column_list
=
[]
for
sheet
in
summary
.
get
(
'sheet'
,
[]):
sheets_list
=
summary
.
get
(
'sheet'
,
[])
for
sheet
in
sheets_list
:
ws
=
self
.
get_sheet_by_name
(
sheet
)
self
.
header_collect
(
ws
,
sheet_header_info
,
header_info
,
max_column_list
,
classify
)
statistics_header_info
=
self
.
header_statistics
(
sheet_header_info
,
header_info
,
classify
)
...
...
@@ -454,7 +454,7 @@ class BSWorkbook(Workbook):
date_list
=
[]
# 用于收集各表中日期
month_mapping
=
{}
# 用于创建月份表
reverse_trend_list
=
[]
# 用于判断倒序与正序
for
sheet
in
s
ummary
.
get
(
'sheet'
,
[])
:
for
sheet
in
s
heets_list
:
ws
=
self
.
get_sheet_by_name
(
sheet
)
date_col
,
min_row
=
self
.
get_data_col_min_row
(
sheet
,
sheet_header_info
,
header_info
,
classify
)
self
.
sheet_split
(
ws
,
date_col
,
min_row
,
month_mapping
,
reverse_trend_list
,
date_list
,
date_statistics
)
...
...
@@ -480,14 +480,15 @@ class BSWorkbook(Workbook):
self
.
build_month_sheet
(
ms
,
card
,
month_mapping
,
is_reverse
,
statistics_header_info
,
max_column
)
# 4.删除原表
for
sheet
in
s
ummary
.
get
(
'sheet'
)
:
for
sheet
in
s
heets_list
:
self
.
remove
(
self
.
get_sheet_by_name
(
sheet
))
def
license_rebuild
(
self
,
license_summary
,
document_scheme
):
for
classify
,
(
_
,
name
,
field_order
,
side_diff
,
scheme_diff
)
in
consts
.
LICENSE_ORDER
:
def
license_rebuild
(
self
,
license_summary
,
document_scheme
,
count_list
):
for
classify
,
(
_
,
name
,
field_order
,
side_diff
,
scheme_diff
,
field_str
)
in
consts
.
LICENSE_ORDER
:
license_list
=
license_summary
.
get
(
classify
)
if
not
license_list
:
continue
count
=
0
ws
=
self
.
create_sheet
(
name
)
if
scheme_diff
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
classify
=
consts
.
MVC_CLASSIFY_SE
...
...
@@ -505,6 +506,8 @@ class BSWorkbook(Workbook):
else
:
ws
.
append
((
write_field
,
field_value
))
ws
.
append
((
None
,
))
count
+=
1
count_list
.
append
((
field_str
,
count
))
def
res_sheet
(
self
,
res_list
):
if
res_list
:
...
...
@@ -519,7 +522,9 @@ class BSWorkbook(Workbook):
self
.
remove
(
self
.
get_sheet_by_name
(
'Sheet'
))
def
rebuild
(
self
,
bs_summary
,
license_summary
,
res_list
,
document_scheme
):
count_list
=
[(
consts
.
MODEL_FIELD_BS
,
len
(
self
.
sheetnames
)
-
1
)]
self
.
bs_rebuild
(
bs_summary
)
self
.
license_rebuild
(
license_summary
,
document_scheme
)
self
.
license_rebuild
(
license_summary
,
document_scheme
,
count_list
)
self
.
res_sheet
(
res_list
)
self
.
remove_base_sheet
()
return
count_list
...
...
src/apps/doc/views.py
View file @
3f659eb
...
...
@@ -128,10 +128,10 @@ class UploadDocView(GenericView, DocHandler):
doc
=
doc_class
.
objects
.
create
(
metadata_version_id
=
document
.
get
(
'metadataVersionId'
),
application_id
=
application_id
,
main_applicant
=
applicant_data
.
get
(
'mainApplicantName'
),
co_applicant
=
applicant_data
.
get
(
'coApplicantName'
),
guarantor_1
=
applicant_data
.
get
(
'guarantor1Name'
),
guarantor_2
=
applicant_data
.
get
(
'guarantor2Name'
),
#
main_applicant=applicant_data.get('mainApplicantName'),
#
co_applicant=applicant_data.get('coApplicantName'),
#
guarantor_1=applicant_data.get('guarantor1Name'),
#
guarantor_2=applicant_data.get('guarantor2Name'),
document_name
=
document
.
get
(
'documentName'
),
document_scheme
=
self
.
fix_scheme
(
document_scheme
),
data_source
=
self
.
fix_data_source
(
data_source
),
...
...
@@ -299,10 +299,10 @@ class DocView(GenericView, DocHandler):
doc
=
doc_class
.
objects
.
create
(
metadata_version_id
=
metadata_version_id
,
application_id
=
application_id
,
main_applicant
=
''
,
co_applicant
=
''
,
guarantor_1
=
''
,
guarantor_2
=
''
,
#
main_applicant='',
#
co_applicant='',
#
guarantor_1='',
#
guarantor_2='',
document_name
=
application_id
,
document_scheme
=
document_scheme
,
data_source
=
data_source
,
...
...
src/common/tools/mssql_script.py
View file @
3f659eb
...
...
@@ -9,13 +9,6 @@ cursor.execute("create database hil")
cursor
.
close
()
cnxn
.
close
()
# retry_step = models.SmallIntegerField(null=True, verbose_name="重试环节")
# retry_times = models.SmallIntegerField(default=0, verbose_name="重试次数")
# is_retry = models.BooleanField(default=False, verbose_name="是否需要重试")
# retry_step tinyint,
# retry_times tinyint default 0 not null,
# is_retry bit default 0 not null,
hil_sql_1
=
"""
create table auth_group
(
...
...
@@ -315,16 +308,26 @@ hil_sql_2 = """
retry_step tinyint,
retry_times tinyint default 0 not null,
is_retry bit default 0 not null,
main_applicant nvarchar(16) not null,
co_applicant nvarchar(16) not null,
guarantor_1 nvarchar(16) not null,
guarantor_2 nvarchar(16) not null,
document_name nvarchar(255) not null,
document_scheme nvarchar(64) not null,
data_source nvarchar(64) not null,
upload_finish_time datetime,
update_time datetime not null,
create_time datetime not null
create_time datetime not null,
start_time datetime,
end_time datetime,
duration smallint,
bs_count smallint default 0 not null,
mvi_count smallint default 0 not null,
ic_count smallint default 0 not null,
rp_count smallint default 0 not null,
bc_count smallint default 0 not null,
bl_count smallint default 0 not null,
uci_count smallint default 0 not null,
eep_count smallint default 0 not null,
dl_count smallint default 0 not null,
pp_count smallint default 0 not null,
mvc_count smallint default 0 not null,
vat_count smallint default 0 not null
);
create index hil_doc_upload_finish_time_index
...
...
@@ -335,6 +338,9 @@ hil_sql_2 = """
create index hil_doc_application_id_status_index
on hil_doc (application_id, status);
create index hil_doc_start_time_end_time_index
on hil_doc (start_time, end_time);
"""
afc_sql
=
"""
...
...
@@ -414,16 +420,26 @@ afc_sql = """
retry_step tinyint,
retry_times tinyint default 0 not null,
is_retry bit default 0 not null,
main_applicant nvarchar(16) not null,
co_applicant nvarchar(16) not null,
guarantor_1 nvarchar(16) not null,
guarantor_2 nvarchar(16) not null,
document_name nvarchar(255) not null,
document_scheme nvarchar(64) not null,
data_source nvarchar(64) not null,
upload_finish_time datetime,
update_time datetime not null,
create_time datetime not null
create_time datetime not null,
start_time datetime,
end_time datetime,
duration smallint,
bs_count smallint default 0 not null,
mvi_count smallint default 0 not null,
ic_count smallint default 0 not null,
rp_count smallint default 0 not null,
bc_count smallint default 0 not null,
bl_count smallint default 0 not null,
uci_count smallint default 0 not null,
eep_count smallint default 0 not null,
dl_count smallint default 0 not null,
pp_count smallint default 0 not null,
mvc_count smallint default 0 not null,
vat_count smallint default 0 not null
);
create index afc_doc_upload_finish_time_index
...
...
@@ -434,6 +450,9 @@ afc_sql = """
create index afc_doc_application_id_status_index
on afc_doc (application_id, status);
create index afc_doc_start_time_end_time_index
on afc_doc (start_time, end_time);
"""
hil_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};SERVER=localhost;DATABASE=hil;UID=SA;PWD=pwd'
,
autocommit
=
True
)
...
...
src/settings/conf/prd.ini
View file @
3f659eb
[settings]
DEBUG
=
False
SLEEP_SECOND
=
5
MAX_SLEEP_SECOND
=
60
SLEEP_SECOND_DOC_GET
=
2
SLEEP_SECOND_IMG_PUT
=
1
SLEEP_SECOND_IMG_GET
=
0.5
SLEEP_SECOND_TASK_GET
=
1
IMG_QUEUE_SIZE
=
500
EDMS_DOWNLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
EDMS_UPLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx
\
EDMS_UPLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx
DEALER_CODE
=
ocr_situ_group
...
...
src/settings/conf/sit.ini
View file @
3f659eb
[settings]
DEBUG
=
True
SLEEP_SECOND
=
5
MAX_SLEEP_SECOND
=
60
SLEEP_SECOND_DOC_GET
=
10
SLEEP_SECOND_IMG_PUT
=
1
SLEEP_SECOND_IMG_GET
=
0.5
SLEEP_SECOND_TASK_GET
=
2
IMG_QUEUE_SIZE
=
500
EDMS_DOWNLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
EDMS_UPLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx
...
...
src/settings/conf/uat.ini
View file @
3f659eb
[settings]
DEBUG
=
False
SLEEP_SECOND
=
5
MAX_SLEEP_SECOND
=
60
SLEEP_SECOND_DOC_GET
=
2
SLEEP_SECOND_IMG_PUT
=
1
SLEEP_SECOND_IMG_GET
=
0.5
SLEEP_SECOND_TASK_GET
=
1
IMG_QUEUE_SIZE
=
500
EDMS_DOWNLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
EDMS_UPLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/UploadHandler.ashx
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment