adb37243 by 冯轩

init

1 parent e08e5c00
......@@ -1504,6 +1504,134 @@ class Command(BaseCommand, LoggerMixin):
self.log_base, traceback.format_exc()))
# error_list.append(1)
# return
elif classify_1_str == '29': # e-invoice
try:
max_img_count = 500
for times in range(consts.RETRY_TIMES):
try:
if doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
self.online_log.info('{0} [mo ni xia dan] [task={1}] [times={2}] '
'[pdf_path={3}]'.format(self.log_base, task_str,
times, pdf_path))
elif os.path.exists(pdf_path):
self.online_log.info('{0} [pdf from zip file] [task={1}] [times={2}] '
'[pdf_path={3}]'.format(self.log_base, task_str,
times, pdf_path))
else:
# self.edms.download(pdf_path, doc.metadata_version_id)
self.edms.download(pdf_path, doc.metadata_version_id, doc.document_scheme,
business_type)
self.online_log.info('{0} [ecm download success] [task={1}] [times={2}] '
'[pdf_path={3}]'.format(self.log_base, task_str,
times, pdf_path))
# 3.PDF文件提取图片
self.online_log.info('{0} [pdf to img start] [task={1}] [times={2}]'.format(
self.log_base, task_str, times))
start_time = time.time()
pdf_handler.extract_image_for_weixin(max_img_count) #沿用微信流程
end_time = time.time()
speed_time = int(end_time - start_time)
self.online_log.info('{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'.format(
self.log_base, task_str, times, speed_time, pdf_handler.is_new_modify))
except Exception as e:
self.online_log.warn('{0} [download or pdf to img failed] [task={1}] [times={2}] '
'[error={3}]'.format(self.log_base, task_str, times,
traceback.format_exc()))
else:
break
else:
raise Exception('download or pdf to img failed')
if pdf_handler.img_count == 0:
self.online_log.warn('{0} [pdf to img failed (pdf img empty)] [task={1}]'.format(
self.log_base, task_str))
raise Exception('pdf img empty')
elif pdf_handler.img_count >= max_img_count:
self.online_log.info('{0} [too many pdf image] [task={1}] [img_count={2}]'.format(
self.log_base, task_str, pdf_handler.img_count))
try:
report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport
report_table.objects.create(
case_number=doc.application_id,
request_team=RequestTeam.get_value(doc.document_scheme, 0),
request_trigger=RequestTrigger.get_value(doc.data_source, 0),
input_file=doc.document_name,
transaction_start=doc.start_time,
transaction_end=doc.start_time,
successful_at_this_level=False,
failure_reason=FailureReason.IMG_LIMIT.value,
process_name=ProcessName.ALL.value,
notes='pdf page count: {0}'.format(str(pdf_handler.img_count))
)
except Exception as e:
self.online_log.error('{0} [process error (report db save)] [error={1}]'.format(
self.log_base, traceback.format_exc()))
try:
doc.status = DocStatus.PROCESS_FAILED.value
doc.page_count = pdf_handler.page_count
doc.save()
except Exception as e:
self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
self.log_base, traceback.format_exc()))
else:
try:
if pdf_handler.is_e_pdf:
doc.metadata = pdf_handler.metadata if pdf_handler.metadata is None else \
json.dumps(pdf_handler.metadata)
doc.page_count = pdf_handler.page_count
doc.save()
except Exception as e:
self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
self.log_base, traceback.format_exc()))
with lock:
todo_count_dict[task_str] = pdf_handler.img_count
self.online_log.info('{0} [pdf_2_img_2_queue] [{1}] [is_ebank={2}]'.format(
self.log_base, task_str, pdf_handler.is_ebank
))
for img_idx, img_path in enumerate(pdf_handler.img_path_list):
while img_queue.full():
self.online_log.info('{0} [pdf_2_img_2_queue] [img queue full]'.format(self.log_base))
time.sleep(self.sleep_time_img_put)
if pdf_handler.is_e_weixin_bs:
try:
#self.online_log.info('{0} [pdf_2_img_2_queue] [img_idx={1}] [page_text_list={2}]'.format(self.log_base, img_idx, pdf_handler.page_text_list))
text_list = pdf_handler.page_text_list[img_idx].pop('rebuild_text')
except Exception as e:
text_list = []
else:
text_list = []
img_queue.put((business_type, img_path, text_list))
except Exception as e:
try:
end_time = timezone.now()
report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport
report_table.objects.create(
case_number=doc.application_id,
request_team=RequestTeam.get_value(doc.document_scheme, 0),
request_trigger=RequestTrigger.get_value(doc.data_source, 0),
input_file=doc.document_name,
transaction_start=doc.start_time,
transaction_end=end_time,
successful_at_this_level=False,
failure_reason=FailureReason.PDF.value,
process_name=ProcessName.ALL.value,
)
except Exception as e:
self.online_log.error('{0} [process error (report db save)] [error={1}]'.format(
self.log_base, traceback.format_exc()))
try:
doc.status = DocStatus.PROCESS_FAILED.value
doc.page_count = pdf_handler.page_count
doc.save()
self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str, traceback.format_exc()))
except Exception as e:
self.online_log.error('{0} [process error (db save)] [error={1}]'.format(
self.log_base, traceback.format_exc()))
else: # e-contract or or e-fsm-contract or e-hmh
try:
# pdf下载 处理 图片存储 识别
......
......@@ -692,7 +692,10 @@ class UploadDocView(GenericView, DocHandler):
if keyword in document_name:
classify_1 = classify_1_tmp
break
if classify_1 == 0 and (document_name.startswith('dzfp_')):
classify_1 = 29
self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id))
if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
or document_name.endswith('.RAR'):
......@@ -1248,6 +1251,10 @@ class DocView(DocGenericView, DocHandler):
classify_1 = classify_1_tmp
break
if classify_1 == 0 and (document_name.startswith('dzfp_')):
classify_1 = 29
self.running_log.info('[dzfp process] [doc_id={0}]'.format(doc.id))
# tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)])
enqueue_res = rh.enqueue([task], is_priority)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!