cd509dca by 冯轩

add:log

1 parent 0cb79d87
No preview for this file type
......@@ -178,6 +178,8 @@ class Command(BaseCommand, LoggerMixin):
# self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path))
def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary):
self.online_log.warn('{0} [bs_process] [ocr_data={1}] [bs_summary={2}] [unknown_summary={3}] [classify={4}] [res_list={5}] [pno={6}] [ino={7}] [part_idx={8}] [income_keywords_dictionary={9}]'.format(
self.log_base, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx, income_keywords_dictionary))
sheets = ocr_data.get('data', [])
if not sheets:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
......@@ -2053,8 +2055,8 @@ class Command(BaseCommand, LoggerMixin):
try:
# 重构Excel文件
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
wb.save(src_excel_path)
#need_follow表示在上传edms时文件名是否要添加"关注"两字
count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict, down_payment_dict)
wb.save(excel_path)
......
......@@ -122,13 +122,19 @@ class BSWorkbook(Workbook, LoggerMixin):
header_col_list = []
for first_row in ws.iter_rows(max_row=1, min_row=1, values_only=True):
sheet_header_info.setdefault(ws.title, {}).setdefault(consts.HEADER_KEY, first_row)
self.online_log.warn('{0} [header_collect_1] [first_row={1}] [sheet_header_info={2}]'.format(
self.log_base, first_row, sheet_header_info))
for idx, header_value in enumerate(first_row):
header_col = self.get_header_col(header_value, classify)
self.online_log.warn('{0} [header_collect_2] [idx={1}] [header_value={2}] [header_col={3}]'.format(
self.log_base, idx, header_value, header_col))
if classify == consts.MS_CLASSIFY and header_col == consts.OVER_KEY and \
header_value == '账户余额现转标志' and not first_row[idx - 1]:
idx -= 1
if header_col is not None:
header_col_list.append((idx, header_col))
self.online_log.warn('{0} [header_collect_3] [header_col_list={1}]'.format(
self.log_base, header_col_list))
find_count = len(header_col_list)
if find_count < 2:
......@@ -136,15 +142,25 @@ class BSWorkbook(Workbook, LoggerMixin):
else:
for idx, header_col in header_col_list:
sheet_header_info.setdefault(ws.title, {}).setdefault(header_col, idx)
self.online_log.warn('{0} [header_collect_4] [sheet_header_info={1}]'.format(
self.log_base, sheet_header_info))
find_col_set = sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COL_KEY, set())
find_col_set.add(idx)
self.online_log.warn('{0} [header_collect_5] [sheet_header_info={1}]'.format(
self.log_base, sheet_header_info))
col_count = header_info.setdefault(header_col, {}).get(idx)
header_info.setdefault(header_col, {})[idx] = 1 if col_count is None else col_count+1
self.online_log.warn('{0} [header_collect_6] [header_info={1}]'.format(
self.log_base, header_info))
sheet_header_info.setdefault(ws.title, {}).setdefault(consts.FIND_COUNT_KEY, find_count)
self.online_log.warn('{0} [header_collect_7] [sheet_header_info={1}]'.format(
self.log_base, sheet_header_info))
min_row = 1 if find_count == 0 else 2
sheet_header_info.setdefault(ws.title, {}).setdefault(consts.MIN_ROW_KEY, min_row)
max_column_list.append(ws.max_column)
self.online_log.warn('{0} [header_collect_8] [sheet_header_info={1}] [header_info={2}] [max_column_list={3}]'.format(
self.log_base, sheet_header_info, header_info, max_column_list))
@staticmethod
def header_statistics(sheet_header_info, header_info, classify, special_nhzs):
......@@ -194,7 +210,7 @@ class BSWorkbook(Workbook, LoggerMixin):
return statistics_header_info, max_find_count
@staticmethod
def get_data_col_min_row(sheet, sheet_header_info, header_info, classify):
def get_data_col_min_row(self, sheet, sheet_header_info, header_info, classify):
date_col = sheet_header_info.get(sheet, {}).get(consts.DATE_KEY)
if date_col is None:
date_col_dict = header_info.get(consts.DATE_KEY, {})
......@@ -273,6 +289,8 @@ class BSWorkbook(Workbook, LoggerMixin):
return reverse_trend
def sheet_split(self, ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics):
self.online_log.warn('{0} [sheet_split] [date_col={1}] [min_row={2}] [month_mapping={3}] [reverse_trend_list={4}] [date_list={5}] [date_statistics={6}]'.format(
self.log_base, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics))
if date_col is None:
# month_info process
month_info = month_mapping.setdefault('xxxx-xx', [])
......@@ -749,7 +767,9 @@ class BSWorkbook(Workbook, LoggerMixin):
reverse_trend_list = [] # 用于判断倒序与正序
for sheet in sheets_list:
ws = self.get_sheet_by_name(sheet)
date_col, min_row = self.get_data_col_min_row(sheet, sheet_header_info, header_info, classify)
date_col, min_row = self.get_data_col_min_row(self, sheet, sheet_header_info, header_info, classify)
self.online_log.warn('{0} [bs_rebuild] [date_col={1}] [min_row={2}]'.format(
self.log_base, date_col, min_row))
self.sheet_split(ws, date_col, min_row, month_mapping, reverse_trend_list, date_list, date_statistics)
if date_statistics is True and len(date_list) > 1:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!