97994674 by 周伟奇

ocr excel upload eDMS

1 parent 7aa0284c
...@@ -12,13 +12,16 @@ Django==2.1 ...@@ -12,13 +12,16 @@ Django==2.1
12 django-oauth-toolkit==1.3.2 12 django-oauth-toolkit==1.3.2
13 djangorestframework==3.9.0 13 djangorestframework==3.9.0
14 djangorestframework-jwt==1.11.0 14 djangorestframework-jwt==1.11.0
15 et-xmlfile==1.0.1
15 idna==2.9 16 idna==2.9
16 idna-ssl==1.1.0 17 idna-ssl==1.1.0
17 isodate==0.6.0 18 isodate==0.6.0
19 jdcal==1.4.1
18 lxml==4.5.1 20 lxml==4.5.1
19 marshmallow==3.6.1 21 marshmallow==3.6.1
20 multidict==4.7.6 22 multidict==4.7.6
21 oauthlib==3.1.0 23 oauthlib==3.1.0
24 openpyxl==3.0.4
22 pdfminer3k==1.3.4 25 pdfminer3k==1.3.4
23 Pillow==7.1.2 26 Pillow==7.1.2
24 ply==3.11 27 ply==3.11
......
1 PAGE_DEFAULT = 1 1 PAGE_DEFAULT = 1
2 PAGE_SIZE_DEFAULT = 10 2 PAGE_SIZE_DEFAULT = 10
3 3
4 DOC_SCHEME_LIST = ['Acceptance', 'Settlement', 'Contract Management'] 4 FIXED_APPLICATION_ID = '手工单'
5 DATA_SOURCE_LIST = ['POS', 'EAPP', 'Econtract'] 5
6 BUSINESS_TYPE_LIST = ['HIL', 'AFC'] 6 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACT MANAGEMENT']
7 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'} 7 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT']
8
8 HIL_PREFIX = 'HIL' 9 HIL_PREFIX = 'HIL'
9 AFC_PREFIX = 'AFC' 10 AFC_PREFIX = 'AFC'
11 SPLIT_STR = '_'
12 BUSINESS_TYPE_LIST = [HIL_PREFIX, AFC_PREFIX]
13 HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
10 14
11 SESSION_PREFIX = 'FHLSID' 15 SESSION_PREFIX = 'FHLSID'
12 CUSTOM_CLIENT = 'CustomClient' 16 CUSTOM_CLIENT = 'CustomClient'
...@@ -15,12 +19,22 @@ FIXED_FILE_SIZE = 0 ...@@ -15,12 +19,22 @@ FIXED_FILE_SIZE = 0
15 DOWNLOAD_ACTION_TYPE = 'Downloaded' 19 DOWNLOAD_ACTION_TYPE = 'Downloaded'
16 20
17 DOC_SCHEMA_ID_FILL = { 21 DOC_SCHEMA_ID_FILL = {
18 'Acceptance': (1, 'DFE-AutoFilingScript'), 22 'ACCEPTANCE': (1, 'DFE-AutoFilingScript'),
19 'Settlement': (20, 'DFE-AutoFilingScript'), 23 'SETTLEMENT': (20, 'DFE-AutoFilingScript'),
20 'Contract Management': (86, 'Schema-Based') 24 'CONTRACT MANAGEMENT': (86, 'Schema-Based')
25 }
26 BUSINESS_TYPE_DICT = {
27 HIL_PREFIX: 'CO00002',
28 AFC_PREFIX: 'CO00001'
21 } 29 }
22 DOC_SCHEMA_TYPE = 'ElectronicRecord' 30 DOC_SCHEMA_TYPE = 'ElectronicRecord'
23 APPLICATION_ID_META_FIELD_id = 1 31 APPLICATION_ID_META_FIELD_id = 1
24 DEALER_CODE_META_FIELD_id = 13 32 DEALER_CODE_META_FIELD_id = 13
25 BUSINESS_TYPE_META_FIELD_id = 93 33 BUSINESS_TYPE_META_FIELD_id = 93
26 DEALER_CODE = 'ocr_situ_group' 34 DEALER_CODE = 'ocr_situ_group'
35
36 AMOUNT_COL_TITLE_SET = {"交易金额", "金额", "收入/支出金额", "发生额"}
37 OVERAGE_COL_TITLE_SET = {"账户余额", "余额"}
38 PROOF_COL_TITLE = '核对结果'
39 PROOF_RES = ('对', '错')
40 META_SHEET_TITLE = '关键信息提取和展示'
......
1 import os
1 import requests 2 import requests
2 from zeep import Client, xsd 3 from zeep import Client, xsd
3 from settings import conf 4 from settings import conf
...@@ -65,9 +66,9 @@ class EDMS: ...@@ -65,9 +66,9 @@ class EDMS:
65 params = {'token': token} 66 params = {'token': token}
66 self.download_handler(params, headers, save_path) 67 self.download_handler(params, headers, save_path)
67 68
68 def create_upload_token(self, headers, file_size): 69 def create_upload_token(self, headers):
69 with self.rc_client.settings(extra_http_headers=headers): 70 with self.rc_client.settings(extra_http_headers=headers):
70 token = self.rc_client.service.CreateUploadToken(fileSize=file_size) 71 token = self.rc_client.service.CreateUploadToken(fileSize=consts.FIXED_FILE_SIZE)
71 return token 72 return token
72 73
73 def upload_handler(self, file_path, params, headers): 74 def upload_handler(self, file_path, params, headers):
...@@ -80,11 +81,19 @@ class EDMS: ...@@ -80,11 +81,19 @@ class EDMS:
80 else: 81 else:
81 raise Exception 82 raise Exception
82 83
83 def get_doc_info(self, token, doc_info): 84 @staticmethod
84 doc_schema_id, auto_filing = consts.DOC_SCHEMA_ID_FILL.get(doc_info.get('document_scheme')) 85 def get_doc_file_name(doc_name):
85 application_id = doc_info.get('application_id') 86 if doc_name.endswith('pdf'):
86 doc_file_name = doc_info.get('doc_file_name') 87 name, _ = os.path.splitext(doc_name)
87 business_type = doc_info.get('business_type') 88 return name
89 return doc_name
90
91 def get_doc_info(self, token, doc, business_type, file_path):
92 business_type = consts.BUSINESS_TYPE_DICT.get(business_type)
93 doc_schema_id, auto_filing = consts.DOC_SCHEMA_ID_FILL.get(doc.document_scheme)
94 application_id = doc.application_id
95 doc_file_name = self.get_doc_file_name(doc.document_name)
96 origin_file_name = os.path.basename(file_path)
88 fields_with_value = [ 97 fields_with_value = [
89 {'FieldId': consts.APPLICATION_ID_META_FIELD_id, 98 {'FieldId': consts.APPLICATION_ID_META_FIELD_id,
90 'FieldValue': xsd.AnyObject(xsd.String(), application_id)}, 99 'FieldValue': xsd.AnyObject(xsd.String(), application_id)},
...@@ -99,20 +108,20 @@ class EDMS: ...@@ -99,20 +108,20 @@ class EDMS:
99 'DocumentName': doc_file_name, 108 'DocumentName': doc_file_name,
100 'FieldsWithValues': fields_with_values, 109 'FieldsWithValues': fields_with_values,
101 'UploadToken': token, 110 'UploadToken': token,
102 'OriginalFileName': doc_file_name, 111 'OriginalFileName': origin_file_name,
103 'SendEmailToMembers': False, 112 'SendEmailToMembers': False,
104 'AutoFilingScriptToUse': auto_filing, 113 'AutoFilingScriptToUse': auto_filing,
105 'DocumentSchemaType': consts.DOC_SCHEMA_TYPE, 114 'DocumentSchemaType': consts.DOC_SCHEMA_TYPE,
106 } 115 }
107 return info 116 return info
108 117
109 def add_doc_info(self, headers, token, doc_info): 118 def add_doc_info(self, headers, token, doc, business_type, file_path):
110 info = self.get_doc_info(token, doc_info) 119 info = self.get_doc_info(token, doc, business_type, file_path)
111 with self.dm_client.settings(extra_http_headers=headers): 120 with self.dm_client.settings(extra_http_headers=headers):
112 metadata_version_id = self.dm_client.service.AddDocumentInfo(info=info) 121 metadata_version_id = self.dm_client.service.AddDocumentInfo(info=info)
113 return metadata_version_id 122 return metadata_version_id
114 123
115 def upload(self, file_path, file_size, doc_info): 124 def upload(self, file_path, doc, business_type):
116 # file_path = '/Users/clay/Postman/files/OCRuploadTest4.txt' 125 # file_path = '/Users/clay/Postman/files/OCRuploadTest4.txt'
117 # file_size = 16 126 # file_size = 16
118 # doc_info = { 127 # doc_info = {
...@@ -122,12 +131,12 @@ class EDMS: ...@@ -122,12 +131,12 @@ class EDMS:
122 # 'business_type': 'CO00001', 131 # 'business_type': 'CO00001',
123 # } 132 # }
124 headers = self.get_headers() 133 headers = self.get_headers()
125 token = self.create_upload_token(headers, file_size) 134 token = self.create_upload_token(headers)
126 headers.update({'Content-Type': 'application/octet-stream'}) 135 headers.update({'Content-Type': 'application/octet-stream'})
127 params = {'token': token} 136 params = {'token': token}
128 self.upload_handler(file_path, params, headers) 137 self.upload_handler(file_path, params, headers)
129 headers.pop('Content-Type') 138 headers.pop('Content-Type')
130 metadata_version_id = self.add_doc_info(headers, token, doc_info) 139 metadata_version_id = self.add_doc_info(headers, token, doc, business_type, file_path)
131 return metadata_version_id 140 return metadata_version_id
132 141
133 142
......
1 import os 1 import os
2 import time 2 import time
3 import fitz 3 import fitz
4 import xlwt
5 import signal 4 import signal
6 import base64 5 import base64
7 import asyncio 6 import asyncio
8 import aiohttp 7 import aiohttp
8 import locale
9 from PIL import Image 9 from PIL import Image
10 from io import BytesIO 10 from io import BytesIO
11 from zeep import Client 11 from openpyxl import Workbook
12 from openpyxl.styles import numbers
13 from openpyxl.utils import get_column_letter
12 14
13 from django.core.management import BaseCommand 15 from django.core.management import BaseCommand
14 from common.mixins import LoggerMixin 16 from common.mixins import LoggerMixin
...@@ -23,7 +25,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -23,7 +25,7 @@ class Command(BaseCommand, LoggerMixin):
23 25
24 def __init__(self): 26 def __init__(self):
25 super().__init__() 27 super().__init__()
26 self.log_base = '[doc process]' 28 self.log_base = '[doc ocr process]'
27 # 处理文件开关 29 # 处理文件开关
28 self.switch = True 30 self.switch = True
29 # 数据目录 31 # 数据目录
...@@ -50,46 +52,54 @@ class Command(BaseCommand, LoggerMixin): ...@@ -50,46 +52,54 @@ class Command(BaseCommand, LoggerMixin):
50 task_str, is_priority = rh.dequeue() 52 task_str, is_priority = rh.dequeue()
51 if task_str is None: 53 if task_str is None:
52 self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) 54 self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base))
53 return None, None, None, None 55 return None, None
54 56
55 business_type, doc_id_str = task_str.split('_') 57 business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
56 doc_id = int(doc_id_str) 58 doc_id = int(doc_id_str)
57 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 59 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
58 doc_info = doc_class.objects.filter(id=doc_id, status=DocStatus.INIT.value).values( 60 # doc_info = doc_class.objects.filter(id=doc_id, status=DocStatus.INIT.value).values(
59 'id', 'metadata_version_id', 'application_id', 'document_name', 'document_scheme').first() 61 # 'id', 'metadata_version_id', 'application_id', 'document_name', 'document_scheme').first()
60 if doc_info is None: 62 doc = doc_class.objects.filter(id=doc_id).first()
61 self.cronjob_log.warn('{0} [get_doc_info] [doc completed] [task_str={1}] [is_priority={2}]'.format( 63 if doc is None:
64 self.cronjob_log.warn('{0} [get_doc_info] [doc not exist] [task_str={1}] [is_priority={2}]'.format(
62 self.log_base, task_str, is_priority)) 65 self.log_base, task_str, is_priority))
63 return None, None, None, None 66 return None, None
64 doc_class.objects.filter(id=doc_id).update(status=DocStatus.PROCESSING.value) 67 elif doc.status != DocStatus.INIT.value:
65 self.cronjob_log.info('{0} [get_doc_info] [task_str={1}] [is_priority={2}] [doc_info={3}]'.format( 68 self.cronjob_log.warn('{0} [get_doc_info] [doc status error] [task_str={1}] [is_priority={2}] '
66 self.log_base, task_str, is_priority, doc_info)) 69 '[doc_status={3}]'.format(self.log_base, task_str, is_priority, doc.status))
67 return doc_info, doc_class, doc_id, business_type 70 return None, None
68 71 doc.status = DocStatus.PROCESSING.value
69 def pdf_download(self, doc_id, doc_info, business_type): 72 doc.save()
70 if doc_info is None: 73 self.cronjob_log.info('{0} [get_doc_info] [success] [task_str={1}] [is_priority={2}]'.format(
74 self.log_base, task_str, is_priority))
75 return doc, business_type
76
77 def pdf_download(self, doc, business_type):
78 if doc is None:
71 return None, None, None 79 return None, None, None
72 # TODO EDMS下载pdf 80 # TODO EDMS下载pdf
73 81 doc_data_path = os.path.join(self.data_dir, business_type, str(doc.id))
74 doc_data_path = os.path.join(self.data_dir, business_type, str(doc_id)) 82 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
75 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc_id)) 83 if doc.application_id != consts.FIXED_APPLICATION_ID:
76 excel_path = os.path.join(doc_data_path, '{0}.xls'.format(doc_id)) 84 self.edms.download(pdf_path, doc.metadata_version_id)
77 self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_info={2}] [pdf_path={3}]'.format( 85
78 self.log_base, business_type, doc_info, pdf_path)) 86 excel_path = os.path.join(doc_data_path, '{0}.xls'.format(doc.id))
87 self.cronjob_log.info('{0} [pdf download success] [business_type={1}] [doc_id={2}] [pdf_path={3}]'.format(
88 self.log_base, business_type, doc.id, pdf_path))
79 return doc_data_path, excel_path, pdf_path 89 return doc_data_path, excel_path, pdf_path
80 90
81 @staticmethod 91 @staticmethod
82 def append_sheet(wb, sheets_list, img_name): 92 def append_sheet(wb, sheets_list, img_name):
83 for i, sheet in enumerate(sheets_list): 93 for i, sheet in enumerate(sheets_list):
84 ws = wb.add_sheet('{0}_{1}'.format(img_name, i)) 94 ws = wb.create_sheet('{0}_{1}'.format(img_name, i))
85 cells = sheet.get('cells') 95 cells = sheet.get('cells')
86 for cell in cells: 96 for cell in cells:
87 c1 = cell.get('start_column') 97 c1 = cell.get('start_column')
88 c2 = cell.get('end_column') 98 # c2 = cell.get('end_column')
89 r1 = cell.get('start_row') 99 r1 = cell.get('start_row')
90 r2 = cell.get('end_row') 100 # r2 = cell.get('end_row')
91 label = cell.get('words') 101 label = cell.get('words')
92 ws.write_merge(r1, r2, c1, c2, label=label) 102 ws.cell(row=r1+1, column=c1+1, value=label)
93 103
94 @staticmethod 104 @staticmethod
95 def get_ocr_json(img_path): 105 def get_ocr_json(img_path):
...@@ -112,6 +122,46 @@ class Command(BaseCommand, LoggerMixin): ...@@ -112,6 +122,46 @@ class Command(BaseCommand, LoggerMixin):
112 img_name = os.path.basename(img_path) 122 img_name = os.path.basename(img_path)
113 self.append_sheet(wb, sheets_list, img_name) 123 self.append_sheet(wb, sheets_list, img_name)
114 124
125 def proof(self, ws):
126 # 找到金额、余额列
127 amount_col = overage_col = None
128 for i in ws[1]:
129 if i.value in consts.AMOUNT_COL_TITLE_SET:
130 amount_col = i.column
131 amount_col_letter = get_column_letter(amount_col)
132 elif i.value in consts.OVERAGE_COL_TITLE_SET:
133 overage_col = i.column
134 overage_col_letter = get_column_letter(overage_col)
135 if amount_col is None or overage_col is None:
136 return
137 # 文本转数值
138 for col_tuple in ws.iter_cols(min_row=2, min_col=amount_col, max_col=overage_col):
139 for c in col_tuple:
140 try:
141 c.value = locale.atof(c.value)
142 c.number_format = numbers.FORMAT_NUMBER_00
143 except Exception:
144 continue
145 # 增加核对结果列
146 proof_col_letter = get_column_letter(ws.max_column + 1)
147 for c in ws[proof_col_letter]:
148 if c.row == 1:
149 c.value = consts.PROOF_COL_TITLE
150 elif c.row == 2:
151 continue
152 else:
153 c.value = '=IF({3}{0}=SUM({2}{0},{3}{1}), "{4}", "{5}")'.format(
154 c.row, c.row - 1, amount_col_letter, overage_col_letter, *consts.PROOF_RES)
155
156 def wb_process(self, wb, excel_path):
157 locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8')
158 for ws in wb.worksheets:
159 if ws.title == 'Sheet':
160 ws.title = consts.META_SHEET_TITLE
161 else:
162 self.proof(ws)
163 wb.save(excel_path) # TODO no sheet (res always [])
164
115 @staticmethod 165 @staticmethod
116 def getimage(pix): 166 def getimage(pix):
117 if pix.colorspace.n != 4: 167 if pix.colorspace.n != 4:
...@@ -124,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -124,7 +174,7 @@ class Command(BaseCommand, LoggerMixin):
124 s = item[1] # xref of its /SMask 174 s = item[1] # xref of its /SMask
125 is_rgb = True if item[5] == 'DeviceRGB' else False 175 is_rgb = True if item[5] == 'DeviceRGB' else False
126 176
127 # GRAY/RGB # TODO 颜色空间不同处理 177 # RGB
128 if is_rgb: 178 if is_rgb:
129 if s == 0: 179 if s == 0:
130 return doc.extractImage(x) 180 return doc.extractImage(x)
...@@ -158,7 +208,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -158,7 +208,7 @@ class Command(BaseCommand, LoggerMixin):
158 208
159 pix1 = pix2 = None # free temp pixmaps 209 pix1 = pix2 = None # free temp pixmaps
160 210
161 pix = fitz.Pixmap(fitz.csRGB, pix) # CMYK to RGB 211 pix = fitz.Pixmap(fitz.csRGB, pix) # GRAY/CMYK to RGB
162 return self.getimage(pix) 212 return self.getimage(pix)
163 213
164 @staticmethod 214 @staticmethod
...@@ -200,19 +250,20 @@ class Command(BaseCommand, LoggerMixin): ...@@ -200,19 +250,20 @@ class Command(BaseCommand, LoggerMixin):
200 250
201 while self.switch: 251 while self.switch:
202 # 1. 从队列获取文件信息 252 # 1. 从队列获取文件信息
203 doc_info, doc_class, doc_id, business_type = self.get_doc_info() 253 doc, business_type = self.get_doc_info()
254
255 try:
256 # 2. 从EDMS获取PDF文件
257 doc_data_path, excel_path, pdf_path = self.pdf_download(doc, business_type)
204 258
205 # 2. 从EDMS获取PDF文件 259 # 队列为空时的处理
206 doc_data_path, excel_path, pdf_path = self.pdf_download(doc_id, doc_info, business_type) 260 if pdf_path is None:
261 time.sleep(sleep_second)
262 sleep_second = min(max_sleep_second, sleep_second+5)
263 continue
207 264
208 # 队列为空时的处理 265 sleep_second = int(conf.SLEEP_SECOND)
209 if pdf_path is None:
210 time.sleep(sleep_second)
211 sleep_second = min(max_sleep_second, sleep_second+5)
212 continue
213 266
214 sleep_second = int(conf.SLEEP_SECOND)
215 try:
216 # 3.PDF文件提取图片 267 # 3.PDF文件提取图片
217 img_save_path = os.path.join(doc_data_path, 'img') 268 img_save_path = os.path.join(doc_data_path, 'img')
218 os.makedirs(img_save_path, exist_ok=True) 269 os.makedirs(img_save_path, exist_ok=True)
...@@ -233,8 +284,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -233,8 +284,8 @@ class Command(BaseCommand, LoggerMixin):
233 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) 284 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
234 pm.writePNG(save_path) 285 pm.writePNG(save_path)
235 img_path_list.append(save_path) 286 img_path_list.append(save_path)
236 self.cronjob_log.info('{0} [page to img success] [doc_id={1}] [pdf_path={2}] ' 287 self.cronjob_log.info('{0} [page to img success] [pdf_path={1}] [page={2}]'.format(
237 '[page={3}]'.format(self.log_base, doc_id, pdf_path, page.number)) 288 self.log_base, pdf_path, page.number))
238 else: # 提取图片 289 else: # 提取图片
239 for img_index, img_il in enumerate(img_il_list): 290 for img_index, img_il in enumerate(img_il_list):
240 if len(img_il) == 1: # 当只有一张图片时, 简化处理 291 if len(img_il) == 1: # 当只有一张图片时, 简化处理
...@@ -246,8 +297,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -246,8 +297,8 @@ class Command(BaseCommand, LoggerMixin):
246 f.write(img_data) 297 f.write(img_data)
247 img_path_list.append(save_path) 298 img_path_list.append(save_path)
248 self.cronjob_log.info( 299 self.cronjob_log.info(
249 '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] ' 300 '{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format(
250 '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index)) 301 self.log_base, pdf_path, pno, img_index))
251 else: # 多张图片,竖向拼接 302 else: # 多张图片,竖向拼接
252 height_sum = 0 303 height_sum = 0
253 im_list = [] 304 im_list = []
...@@ -276,28 +327,41 @@ class Command(BaseCommand, LoggerMixin): ...@@ -276,28 +327,41 @@ class Command(BaseCommand, LoggerMixin):
276 res.save(save_path) 327 res.save(save_path)
277 img_path_list.append(save_path) 328 img_path_list.append(save_path)
278 self.cronjob_log.info( 329 self.cronjob_log.info(
279 '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] ' 330 '{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format(
280 '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index)) 331 self.log_base, pdf_path, pno, img_index))
281 self.cronjob_log.info('{0} [pdf to img success] [doc_id={1}]'.format(self.log_base, doc_id)) 332 self.cronjob_log.info('{0} [pdf to img success] [business_type={1}] [doc_id={2}]'.format(
333 self.log_base, business_type, doc.id))
282 334
283 write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id))) 335 write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc.id)))
284 336
285 # 4.图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件 337 # 4.图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件
286 wb = xlwt.Workbook() 338 wb = Workbook()
287 loop = asyncio.get_event_loop() 339 loop = asyncio.get_event_loop()
288 tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list] 340 tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list]
289 loop.run_until_complete(asyncio.wait(tasks)) 341 loop.run_until_complete(asyncio.wait(tasks))
290 # loop.close() 342 # loop.close()
291 wb.save(excel_path) # TODO no sheet (res always [])
292 # 整合excel文件
293
294 # 5.上传至EDMS
295 343
344 # 整合excel文件
345 # self.wb_process(wb, excel_path)
346 wb.save(excel_path)
296 except Exception as e: 347 except Exception as e:
297 doc_class.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value) 348 doc.status = DocStatus.PROCESS_FAILED.value
298 self.cronjob_log.error('{0} [process failed] [doc_id={1}] [err={2}]'.format(self.log_base, doc_id, e)) 349 doc.save()
350 self.cronjob_log.error('{0} [process failed] [business_type={1}] [doc_id={2}] [err={3}]'.format(
351 self.log_base, business_type, doc.id, e))
299 else: 352 else:
300 doc_class.objects.filter(id=doc_id).update(status=DocStatus.COMPLETE.value) 353 try:
301 self.cronjob_log.info('{0} [doc process complete] [doc_id={1}]'.format(self.log_base, doc_id)) 354 # 5.上传至EDMS
355 self.edms.upload(excel_path, doc, business_type)
356 except Exception as e:
357 doc.status = DocStatus.UPLOAD_FAILED.value
358 doc.save()
359 self.cronjob_log.error('{0} [upload failed] [business_type={1}] [doc_id={2}] [err={3}]'.format(
360 self.log_base, business_type, doc.id, e))
361 else:
362 doc.status = DocStatus.COMPLETE.value
363 doc.save()
364 self.cronjob_log.info('{0} [doc process complete] [business_type={1}] [doc_id={2}]'.format(
365 self.log_base, business_type, doc.id))
302 366
303 self.cronjob_log.info('{0} [stop safely]'.format(self.log_base)) 367 self.cronjob_log.info('{0} [stop safely]'.format(self.log_base))
......
...@@ -86,99 +86,166 @@ class Command(BaseCommand, LoggerMixin): ...@@ -86,99 +86,166 @@ class Command(BaseCommand, LoggerMixin):
86 86
87 @staticmethod 87 @staticmethod
88 def split_il(il): 88 def split_il(il):
89 img_il_list = [] 89 small_img_il_list = []
90 big_img_il_list = []
90 start = 0 91 start = 0
92 index = 0
91 length = len(il) 93 length = len(il)
92 for i in range(length): 94 for i in range(length):
95 if il[i][2] >= 700 and il[i][3] >= 647:
96 if start < i:
97 small_img_il_list.append((il[start: i], index))
98 index += 1
99 else:
100 start += 1
101 big_img_il_list.append((il[i], index))
102 index += 1
103 continue
93 if i == start: 104 if i == start:
94 if i == length - 1: 105 if i == length - 1:
95 img_il_list.append(il[start: length]) 106 small_img_il_list.append((il[start: length], index))
96 continue 107 continue
97 elif i == length - 1: 108 elif i == length - 1:
98 img_il_list.append(il[start: length]) 109 if il[i][2] == il[i - 1][2]:
110 small_img_il_list.append((il[start: length], index))
111 else:
112 small_img_il_list.append((il[start: i], index))
113 small_img_il_list.append((il[i: length], index+1))
99 continue 114 continue
100 if il[i][2] != il[i - 1][2]: 115 if il[i][2] != il[i - 1][2]:
101 img_il_list.append(il[start: i]) 116 small_img_il_list.append((il[start: i], index))
117 index += 1
102 start = i 118 start = i
103 elif il[i][3] != il[i - 1][3]: 119 elif il[i][3] != il[i - 1][3] and il[i][2] < 1200:
104 img_il_list.append(il[start: i + 1]) 120 small_img_il_list.append((il[start: i + 1], index))
121 index += 1
105 start = i + 1 122 start = i + 1
106 return img_il_list 123 return small_img_il_list, big_img_il_list
107 124
108 def handle(self, *args, **kwargs): 125 def handle(self, *args, **kwargs):
109 pdf_dir = '/Users/clay/Desktop/普通打印-部分无线/竖版-无表格-农业银行' 126 pdf_dir = '/Users/clay/Desktop/问题PDF'
110 img_dir = '/Users/clay/Desktop/普通打印-部分无线_img/竖版-无表格-农业银行' 127 img_dir = '/Users/clay/Desktop/问题PDF'
111 os.makedirs(img_dir, exist_ok=True)
112 for d in os.listdir(pdf_dir): 128 for d in os.listdir(pdf_dir):
113 # if d in ['.DS_Store', 'CH-B008486764.pdf', 'CH-B008003736.pdf', 'CH-B008487476.pdf', 'CH-B006763780.pdf', 129 # if d in ['.DS_Store', 'CH-B008003736.pdf', 'CH-B006317088.pdf', 'CH-B008487476.pdf', 'CH-B006337608.pdf',
114 # 'CH-B009000564.pdf', 'CH-B009020488.pdf']: 130 # 'CH-B006391612.pdf', 'CH-B006536124.pdf', 'CH-B006526652.pdf', 'CH-B009003592.pdf']:
115 if d in ['.DS_Store', '1竖版-无表格-农业银行样例.PNG']: 131 # continue
132 # if d != 'CH-B006393152.PDF':
133 # if d != 'CH-B006526652.pdf':
134 if d != 'CH-B008487944.pdf':
116 continue 135 continue
117 pdf_path = os.path.join(pdf_dir, d) 136 pdf_path = os.path.join(pdf_dir, d)
118 # pdf_path = '/Users/clay/Desktop/普通打印part2/工商银行(标准版)/CH-B006754676.pdf'
119 if os.path.isfile(pdf_path): 137 if os.path.isfile(pdf_path):
120 img_save_path = os.path.join(img_dir, d) 138 img_save_path = os.path.join(img_dir, d[:-4])
121 if os.path.exists(img_save_path): 139 # if os.path.exists(img_save_path):
122 continue 140 # continue
123 os.makedirs(img_save_path, exist_ok=True) 141 os.makedirs(img_save_path, exist_ok=True)
124 with fitz.Document(pdf_path) as pdf: 142 with fitz.Document(pdf_path) as pdf:
125 self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format( 143 self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format(
126 self.log_base, pdf_path, pdf.metadata)) 144 self.log_base, pdf_path, pdf.metadata))
127 # xref_list = [] 145 xref_set = set()
128 for pno in range(pdf.pageCount): 146 for pno in range(pdf.pageCount):
147 print('---------------------------------------')
129 il = pdf.getPageImageList(pno) 148 il = pdf.getPageImageList(pno)
130 il.sort(key=lambda x: x[0]) 149 # (xref, smask, width, height, bpc, colorspace, alt.colorspace, name, filter, invoker)
131 img_il_list = self.split_il(il) 150 print(il)
132 del il 151
133 152 # for img_index, img in enumerate(il):
134 print(img_il_list) 153 # pix = self.recoverpix(pdf, img)
135 if len(img_il_list) > 3: # 单页无规律小图过多时,使用页面转图片 154 # ext, img_data = self.get_img_data(pix)
155 # save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format(
156 # pno, img_index, ext))
157 # with open(save_path, "wb") as f:
158 # f.write(img_data)
159
160 if len(il) == 0:
136 page = pdf.loadPage(pno) 161 page = pdf.loadPage(pno)
137 pm = page.getPixmap(matrix=self.trans, alpha=False) 162 pm = page.getPixmap(matrix=self.trans, alpha=False)
138 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) 163 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
139 pm.writePNG(save_path) 164 pm.writePNG(save_path)
140 # img_path_list.append(save_path) 165 elif len(il) == 1:
141 # self.cronjob_log.info('{0} [page to img success] [doc_id={1}] [pdf_path={2}] ' 166 width = il[0][2]
142 # '[page={3}]'.format(self.log_base, doc_id, pdf_path, page.number)) 167 height = il[0][3]
143 else: # 提取图片 168 colorspace = il[0][5]
144 for img_index, img_il in enumerate(img_il_list): 169 adobe_filter = il[0][-1]
145 if len(img_il) == 1: # 当只有一张图片时, 简化处理 170 if colorspace == '' or adobe_filter in ['', '']:
146 pix = self.recoverpix(pdf, img_il[0]) 171 continue
172 # 小图
173 if width < 500 and height < 500:
174 page = pdf.loadPage(pno)
175 pm = page.getPixmap(matrix=self.trans, alpha=False)
176 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
177 pm.writePNG(save_path)
178 # 大图
179 elif il[0][0] not in xref_set:
180 pix = self.recoverpix(pdf, il[0])
181 ext, img_data = self.get_img_data(pix)
182 save_path = os.path.join(img_save_path, 'page_{0}_img_0.{1}'.format(pno, ext))
183 with open(save_path, "wb") as f:
184 f.write(img_data)
185 xref_set.add(il[0][0])
186 else:
187 il.sort(key=lambda x: x[0])
188 small_img_il_list, big_img_il_list = self.split_il(il)
189 print(small_img_il_list)
190 print(big_img_il_list)
191 print('+++++++++++++++++++++++++++++++++++')
192
193 if len(small_img_il_list) > 2: # 单页无规律小图过多时,使用页面转图片
194 page = pdf.loadPage(pno)
195 pm = page.getPixmap(matrix=self.trans, alpha=False)
196 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
197 pm.writePNG(save_path)
198 else: # 提取图片
199 for img_il, img_index in big_img_il_list:
200 if img_il[0] in xref_set:
201 continue
202 pix = self.recoverpix(pdf, img_il)
147 ext, img_data = self.get_img_data(pix) 203 ext, img_data = self.get_img_data(pix)
148 save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format( 204 save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format(
149 pno, img_index, ext)) 205 pno, img_index, ext))
150 with open(save_path, "wb") as f: 206 with open(save_path, "wb") as f:
151 f.write(img_data) 207 f.write(img_data)
152 # img_path_list.append(save_path) 208 xref_set.add(img_il[0])
153 # self.cronjob_log.info( 209
154 # '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] ' 210 for img_il, img_index in small_img_il_list:
155 # '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index)) 211 # 小图
156 else: # 多张图片,竖向拼接 212 if len(img_il) == 1 and img_il[0][2] < 500 and img_il[0][3] < 500:
157 height_sum = 0 213 page = pdf.loadPage(pno)
158 im_list = [] 214 pm = page.getPixmap(matrix=self.trans, alpha=False)
159 width = img_il[0][2] 215 save_path = os.path.join(img_save_path,
160 for img in img_il: 216 'page_{0}_img_0.png'.format(page.number))
161 # xref = img[0] 217 pm.writePNG(save_path)
162 # if xref in xref_list: 218 elif len(img_il) == 1 and img_il[0][0] not in xref_set: # 当只有一张图片时, 简化处理
163 # continue 219 pix = self.recoverpix(pdf, img_il[0])
164 height = img[3]
165 pix = self.recoverpix(pdf, img)
166 ext, img_data = self.get_img_data(pix) 220 ext, img_data = self.get_img_data(pix)
167 221 save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format(
168 # xref_list.append(xref) 222 pno, img_index, ext))
169 223 with open(save_path, "wb") as f:
170 im = Image.open(BytesIO(img_data)) 224 f.write(img_data)
171 im_list.append((height, im, ext)) 225 xref_set.add(img_il[0][0])
172 height_sum += height 226 else: # 多张图片,竖向拼接
173 227 height_sum = 0
174 save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format( 228 im_list = []
175 pno, img_index, im_list[0][2])) 229 width = img_il[0][2]
176 res = Image.new(im_list[0][1].mode, (width, height_sum)) 230 for img in img_il:
177 h_now = 0 231 # xref = img[0]
178 for h, m, _ in im_list: 232 # if xref in xref_list:
179 res.paste(m, box=(0, h_now)) 233 # continue
180 h_now += h 234 height = img[3]
181 res.save(save_path) 235 pix = self.recoverpix(pdf, img)
182 # else: 236 ext, img_data = self.get_img_data(pix)
183 # img_dir_path = os.path.join(img_dir, d) 237
184 # os.makedirs(img_dir_path, exist_ok=True) 238 # xref_list.append(xref)
239
240 im = Image.open(BytesIO(img_data))
241 im_list.append((height, im, ext))
242 height_sum += height
243
244 save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format(
245 pno, img_index, im_list[0][2]))
246 res = Image.new(im_list[0][1].mode, (width, height_sum))
247 h_now = 0
248 for h, m, _ in im_list:
249 res.paste(m, box=(0, h_now))
250 h_now += h
251 res.save(save_path)
......
...@@ -26,7 +26,21 @@ class DocHandler: ...@@ -26,7 +26,21 @@ class DocHandler:
26 26
27 @staticmethod 27 @staticmethod
28 def get_doc_class(business_type): 28 def get_doc_class(business_type):
29 is_hil = business_type in consts.HIL_SET 29 return (HILDoc, consts.HIL_PREFIX) if business_type in consts.HIL_SET else (AFCDoc, consts.AFC_PREFIX)
30 doc_class, prefix = (HILDoc, consts.HIL_PREFIX) if is_hil else (AFCDoc, consts.AFC_PREFIX) 30
31 return doc_class, prefix 31 def fix_scheme(self, scheme):
32 if scheme in consts.DOC_SCHEME_LIST:
33 return scheme
34 elif scheme.upper() in consts.DOC_SCHEME_LIST:
35 return scheme.upper()
36 else:
37 return consts.DOC_SCHEME_LIST[0]
38
39 def fix_data_source(self, data_source):
40 if data_source in consts.DATA_SOURCE_LIST:
41 return data_source
42 elif data_source.upper() in consts.DATA_SOURCE_LIST:
43 return data_source.upper()
44 else:
45 return consts.DATA_SOURCE_LIST[0]
32 46
......
...@@ -7,20 +7,3 @@ class DocStatus(NamedEnum): ...@@ -7,20 +7,3 @@ class DocStatus(NamedEnum):
7 PROCESS_FAILED = (2, '识别失败') 7 PROCESS_FAILED = (2, '识别失败')
8 UPLOAD_FAILED = (3, '同步失败') 8 UPLOAD_FAILED = (3, '同步失败')
9 COMPLETE = (4, '已完成') 9 COMPLETE = (4, '已完成')
10
11
12 class DocScheme(NamedEnum):
13 ACCEPTANCE = (0, "Acceptance")
14 SETTLEMENT = (1, 'Settlement')
15 CONTRACT_MANAGEMENT = (2, 'Contract Management')
16
17
18 class BusinessType(NamedEnum):
19 AFC = (0, "CO00001")
20 HIL = (1, 'CO00002')
21
22
23 class DataSource(NamedEnum):
24 POS = (0, "POS")
25 EAPP = (1, 'EAPP')
26 ECONTRACT = (2, 'Econtract')
......
...@@ -60,7 +60,7 @@ doc_list_args = { ...@@ -60,7 +60,7 @@ doc_list_args = {
60 'status': fields.Int(required=False, 60 'status': fields.Int(required=False,
61 validate=validate.OneOf(DocStatus.get_value_lst())), 61 validate=validate.OneOf(DocStatus.get_value_lst())),
62 'application_id': fields.Str(required=False, validate=validate.Length(max=64)), 62 'application_id': fields.Str(required=False, validate=validate.Length(max=64)),
63 'data_source': fields.Str(required=False, validate=validate.Length(max=64)), 63 'data_source': fields.Str(required=False, validate=validate.OneOf(consts.DATA_SOURCE_LIST)),
64 'business_type': fields.Str(required=True, validate=validate.OneOf(consts.BUSINESS_TYPE_LIST)), 64 'business_type': fields.Str(required=True, validate=validate.OneOf(consts.BUSINESS_TYPE_LIST)),
65 'upload_time_start': fields.Date(required=False), 65 'upload_time_start': fields.Date(required=False),
66 'upload_time_end': fields.Date(required=False), 66 'upload_time_end': fields.Date(required=False),
...@@ -100,6 +100,8 @@ class UploadDocView(GenericView, DocHandler): ...@@ -100,6 +100,8 @@ class UploadDocView(GenericView, DocHandler):
100 document = args.get('document') 100 document = args.get('document')
101 business_type = document.get('businessType') 101 business_type = document.get('businessType')
102 application_id = application_data.get('applicationId') 102 application_id = application_data.get('applicationId')
103 document_scheme = document.get('documentScheme')
104 data_source = document.get('dataSource')
103 try: 105 try:
104 # 1. 上传信息记录 106 # 1. 上传信息记录
105 record = UploadDocRecords.objects.create( 107 record = UploadDocRecords.objects.create(
...@@ -110,9 +112,9 @@ class UploadDocView(GenericView, DocHandler): ...@@ -110,9 +112,9 @@ class UploadDocView(GenericView, DocHandler):
110 guarantor_1=applicant_data.get('guarantor1Name'), 112 guarantor_1=applicant_data.get('guarantor1Name'),
111 guarantor_2=applicant_data.get('guarantor2Name'), 113 guarantor_2=applicant_data.get('guarantor2Name'),
112 document_name=document.get('documentName'), 114 document_name=document.get('documentName'),
113 document_scheme=document.get('documentScheme'), 115 document_scheme=document_scheme,
114 business_type=business_type, 116 business_type=business_type,
115 data_source=document.get('dataSource'), 117 data_source=data_source,
116 upload_finish_time=document.get('uploadFinishTime'), 118 upload_finish_time=document.get('uploadFinishTime'),
117 ) 119 )
118 except IntegrityError as e: 120 except IntegrityError as e:
...@@ -130,17 +132,17 @@ class UploadDocView(GenericView, DocHandler): ...@@ -130,17 +132,17 @@ class UploadDocView(GenericView, DocHandler):
130 guarantor_1=applicant_data.get('guarantor1Name'), 132 guarantor_1=applicant_data.get('guarantor1Name'),
131 guarantor_2=applicant_data.get('guarantor2Name'), 133 guarantor_2=applicant_data.get('guarantor2Name'),
132 document_name=document.get('documentName'), 134 document_name=document.get('documentName'),
133 document_scheme=document.get('documentScheme'), 135 document_scheme=self.fix_scheme(document_scheme),
134 data_source=document.get('dataSource'), 136 data_source=self.fix_data_source(data_source),
135 upload_finish_time=document.get('uploadFinishTime'), 137 upload_finish_time=document.get('uploadFinishTime'),
136 ) 138 )
137 # 3. 选择队列进入 139 # 3. 选择队列进入
138 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists() 140 is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
139 value = ['{0}_{1}'.format(prefix, doc.id)] 141 tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
140 redis_res = rh.enqueue(value, is_priority) 142 enqueue_res = rh.enqueue(tasks, is_priority)
141 self.running_log.info('[doc upload success] [args={0}] [record_id={1}] [prefix={2}] [doc_id={3}] ' 143 self.running_log.info('[doc upload success] [args={0}] [record_id={1}] [business_type={2}] [doc_id={3}] '
142 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id, 144 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id,
143 is_priority, redis_res)) 145 is_priority, enqueue_res))
144 return response.ok() 146 return response.ok()
145 147
146 post.openapi_doc = ''' 148 post.openapi_doc = '''
...@@ -174,7 +176,8 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -174,7 +176,8 @@ class PriorityDocView(GenericView, DocHandler):
174 application_id = application_info.get('APPLICATION_ID') 176 application_id = application_info.get('APPLICATION_ID')
175 submit_datetime = application_info.get('SUBMIT_DATETIME') 177 submit_datetime = application_info.get('SUBMIT_DATETIME')
176 entity = application_info.get('ENTITY') 178 entity = application_info.get('ENTITY')
177 submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone()) 179 if submit_datetime.utcoffset() is not None:
180 submit_datetime = timezone.make_naive(submit_datetime, timezone.get_current_timezone())
178 GCAPRecords.objects.create( 181 GCAPRecords.objects.create(
179 entity=entity, 182 entity=entity,
180 status=application_info.get('STATUS'), 183 status=application_info.get('STATUS'),
...@@ -190,14 +193,14 @@ class PriorityDocView(GenericView, DocHandler): ...@@ -190,14 +193,14 @@ class PriorityDocView(GenericView, DocHandler):
190 doc_class, prefix = self.get_doc_class(entity) 193 doc_class, prefix = self.get_doc_class(entity)
191 doc_ids = doc_class.objects.filter(application_id=application_id, 194 doc_ids = doc_class.objects.filter(application_id=application_id,
192 status=DocStatus.INIT.value).values_list('id', flat=True) 195 status=DocStatus.INIT.value).values_list('id', flat=True)
193 task_str_list = ['{0}_{1}'.format(prefix, doc_id) for doc_id in doc_ids] 196 tasks_list = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc_id) for doc_id in doc_ids]
194 if not task_str_list: 197 if not tasks_list:
195 self.running_log.info( 198 self.running_log.info(
196 '[priority doc success] [args={0}] [task_str_list={1}]'.format(args, task_str_list)) 199 '[priority doc success] [args={0}]'.format(args))
197 else: 200 else:
198 enqueue_res = rh.enqueue(task_str_list, is_priority=True) 201 enqueue_res = rh.enqueue(tasks_list, is_priority=True)
199 self.running_log.info('[priority doc success] [args={0}] [task_str_list={1}] [enqueue_res={2}]'.format( 202 self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format(
200 args, task_str_list, enqueue_res)) 203 args, tasks_list, enqueue_res))
201 return response.ok() 204 return response.ok()
202 205
203 post.openapi_doc = ''' 206 post.openapi_doc = '''
...@@ -268,7 +271,7 @@ class DocView(GenericView, DocHandler): ...@@ -268,7 +271,7 @@ class DocView(GenericView, DocHandler):
268 @use_args(upload_pdf_args, location='files') 271 @use_args(upload_pdf_args, location='files')
269 def post(self, request, args): 272 def post(self, request, args):
270 # 1. 上传信息记录 273 # 1. 上传信息记录
271 const_str = '手工单' 274 const_str = consts.FIXED_APPLICATION_ID
272 metadata_version_id = str(int(time.time())) 275 metadata_version_id = str(int(time.time()))
273 upload_finish_time = timezone.now() 276 upload_finish_time = timezone.now()
274 document_scheme = random.choice(consts.DOC_SCHEME_LIST) 277 document_scheme = random.choice(consts.DOC_SCHEME_LIST)
...@@ -305,8 +308,8 @@ class DocView(GenericView, DocHandler): ...@@ -305,8 +308,8 @@ class DocView(GenericView, DocHandler):
305 ) 308 )
306 # 3. 选择队列进入 309 # 3. 选择队列进入
307 is_priority = False 310 is_priority = False
308 value = ['{0}_{1}'.format(prefix, doc.id)] 311 tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
309 redis_res = rh.enqueue(value, is_priority) 312 enqueue_res = rh.enqueue(tasks, is_priority)
310 313
311 pdf_file = args.get('pdf_file') 314 pdf_file = args.get('pdf_file')
312 save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id)) 315 save_dir_path = os.path.join(conf.DATA_DIR, business_type, str(doc.id))
...@@ -314,7 +317,7 @@ class DocView(GenericView, DocHandler): ...@@ -314,7 +317,7 @@ class DocView(GenericView, DocHandler):
314 os.makedirs(save_dir_path, exist_ok=True) 317 os.makedirs(save_dir_path, exist_ok=True)
315 file_write(pdf_file, save_file_path) 318 file_write(pdf_file, save_file_path)
316 319
317 self.running_log.info('[mock doc upload success] [args={0}] [record_id={1}] [prefix={2}] [doc_id={3}] ' 320 self.running_log.info('[mock doc upload success] [args={0}] [record_id={1}] [business_type={2}] [doc_id={3}] '
318 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id, 321 '[is_priority={4}] [enqueue_res={5}]'.format(args, record.id, prefix, doc.id,
319 is_priority, redis_res)) 322 is_priority, enqueue_res))
320 return response.ok() 323 return response.ok()
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!