550d86c1 by 周伟奇

fix bug

1 parent 6934c592
......@@ -71,6 +71,7 @@ class Command(BaseCommand, LoggerMixin):
self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base))
return None, None, None
self.cronjob_log.info('{0} [get_doc_info success] [task={1}] [is_priority={2}]'.format(self.log_base, task_str, is_priority))
doc, business_type = self.get_doc_object(task_str)
if doc is None:
......@@ -421,36 +422,40 @@ class Command(BaseCommand, LoggerMixin):
time.sleep(self.sleep_time_img_get)
continue
else:
self.cronjob_log.info('{0} [img_2_ocr_1] [get img] [img_path={1}]'.format(self.log_base, img_path))
for times in range(consts.RETRY_TIMES):
try:
with open(img_path, 'rb') as f:
base64_data = base64.b64encode(f.read())
# 获取解码后的base64值
file_data = base64_data.decode()
json_data_1 = {
"file": file_data
}
start_time = time.time()
ocr_1_response = requests.post(url, json=json_data_1)
if ocr_1_response.status_code != 200:
raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code))
except Exception as e:
self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [img_path={2}] [error={3}]'.format(
self.log_base, times, img_path, traceback.format_exc()))
try:
self.cronjob_log.info('{0} [img_2_ocr_1] [get img] [img_path={1}]'.format(self.log_base, img_path))
for times in range(consts.RETRY_TIMES):
try:
with open(img_path, 'rb') as f:
base64_data = base64.b64encode(f.read())
# 获取解码后的base64值
file_data = base64_data.decode()
json_data_1 = {
"file": file_data
}
start_time = time.time()
ocr_1_response = requests.post(url, json=json_data_1)
if ocr_1_response.status_code != 200:
raise OCR1Exception('ocr_1 status code: {0}'.format(ocr_1_response.status_code))
except Exception as e:
self.cronjob_log.warn('{0} [ocr_1 failed] [times={1}] [img_path={2}] [error={3}]'.format(
self.log_base, times, img_path, traceback.format_exc()))
else:
ocr_1_res = ocr_1_response.json()
end_time = time.time()
speed_time = int(end_time - start_time)
self.cronjob_log.info('{0} [ocr_1 success] [img={1}] [res={2}] [speed_time={3}]'.format(
self.log_base, img_path, ocr_1_res, speed_time))
break
else:
ocr_1_res = ocr_1_response.json()
end_time = time.time()
speed_time = int(end_time - start_time)
self.cronjob_log.info('{0} [ocr_1 success] [img={1}] [res={2}] [speed_time={3}]'.format(
self.log_base, img_path, ocr_1_res, speed_time))
break
else:
ocr_1_res = {}
self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}]'.format(self.log_base, img_path))
# continue
ocr_1_res = {}
self.cronjob_log.warn('{0} [ocr_1 failed] [img_path={1}]'.format(self.log_base, img_path))
# continue
except Exception as e:
self.cronjob_log.error('{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'.format(
self.log_base, img_path, traceback.format_exc()))
try:
del json_data_1
......@@ -470,7 +475,7 @@ class Command(BaseCommand, LoggerMixin):
else:
todo_count_dict[task_str] = todo_count - 1
except Exception as e:
self.cronjob_log.error('{0} [process failed (store ocr res)] [img_path={1}] [error={2}]'.format(
self.cronjob_log.error('{0} [process error (store ocr res)] [img_path={1}] [error={2}]'.format(
self.log_base, img_path, traceback.format_exc()))
def res_2_wb(self, res_dict, finish_queue, lock):
......@@ -482,12 +487,12 @@ class Command(BaseCommand, LoggerMixin):
time.sleep(self.sleep_time_task_get)
continue
else:
self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str))
ocr_1_res = res_dict.get(task_str, {})
# self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format(
# self.log_base, task_str))
try:
self.cronjob_log.info('{0} [res_2_wb] [get task] [task={1}]'.format(self.log_base, task_str))
ocr_1_res = res_dict.get(task_str, {})
# self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format(
# self.log_base, task_str))
# 4.OCR结果并且构建excel文件
bs_summary = {}
license_summary = {}
......@@ -605,14 +610,19 @@ class Command(BaseCommand, LoggerMixin):
count_list = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme)
wb.save(excel_path)
except Exception as e:
with lock:
if task_str in res_dict:
del res_dict[task_str]
doc, business_type = self.get_doc_object(task_str)
doc.status = DocStatus.PROCESS_FAILED.value
doc.save()
self.cronjob_log.error('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
try:
with lock:
if task_str in res_dict:
del res_dict[task_str]
doc, business_type = self.get_doc_object(task_str)
doc.status = DocStatus.PROCESS_FAILED.value
doc.save()
self.cronjob_log.error('{0} [process failed (res to wb)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
except Exception as e:
self.cronjob_log.error('{0} [process error (wb end)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
try:
doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, str(doc.id))
img_save_path = os.path.join(doc_data_path, 'img')
......@@ -620,7 +630,7 @@ class Command(BaseCommand, LoggerMixin):
pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
os.remove(pdf_path)
except Exception as e:
self.cronjob_log.error('{0} [file remove failed] [task={1}] [error={2}]'.format(
self.cronjob_log.error('{0} [process error (file remove 1)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
else:
try:
......@@ -631,7 +641,7 @@ class Command(BaseCommand, LoggerMixin):
# os.remove(pdf_path)
# os.remove(src_excel_path)
except Exception as e:
self.cronjob_log.error('{0} [file remove failed] [task={1}] [error={2}]'.format(
self.cronjob_log.error('{0} [process error (file remove 2)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
try:
# 5.上传至EDMS
......@@ -648,26 +658,33 @@ class Command(BaseCommand, LoggerMixin):
else:
raise EDMSException(edms_exc)
except Exception as e:
doc.status = DocStatus.UPLOAD_FAILED.value
doc.end_time = timezone.now()
doc.duration = min((doc.end_time - doc.start_time).seconds, 32760)
for field, count in count_list:
if hasattr(doc, field):
setattr(doc, field, count)
doc.save()
self.cronjob_log.error('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
try:
doc.status = DocStatus.UPLOAD_FAILED.value
doc.end_time = timezone.now()
doc.duration = min((doc.end_time - doc.start_time).seconds, 32760)
for field, count in count_list:
if hasattr(doc, field):
setattr(doc, field, count)
doc.save()
self.cronjob_log.error('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
except Exception as e:
self.cronjob_log.error('{0} [process error (edms upload)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
else:
doc.status = DocStatus.COMPLETE.value
doc.end_time = timezone.now()
doc.duration = min((doc.end_time - doc.start_time).seconds, 32760)
for field, count in count_list:
if hasattr(doc, field):
setattr(doc, field, count)
doc.save()
self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str))
# os.remove(excel_path)
try:
doc.status = DocStatus.COMPLETE.value
doc.end_time = timezone.now()
doc.duration = min((doc.end_time - doc.start_time).seconds, 32760)
for field, count in count_list:
if hasattr(doc, field):
setattr(doc, field, count)
doc.save()
self.cronjob_log.info('{0} [process complete] [task={1}]'.format(self.log_base, task_str))
# os.remove(excel_path)
except Exception as e:
self.cronjob_log.error('{0} [process error (completed)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
# TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!