merge license
Showing
6 changed files
with
79 additions
and
61 deletions
This diff is collapsed.
Click to expand it.
src/apps/doc/exceptions.py
0 → 100644
This diff is collapsed.
Click to expand it.
| ... | @@ -141,32 +141,22 @@ class BSWorkbook(Workbook): | ... | @@ -141,32 +141,22 @@ class BSWorkbook(Workbook): |
| 141 | # month_info process | 141 | # month_info process |
| 142 | month_info = month_mapping.setdefault('xxxx-xx', []) | 142 | month_info = month_mapping.setdefault('xxxx-xx', []) |
| 143 | month_info.append((ws.title, min_row, ws.max_row, 0)) | 143 | month_info.append((ws.title, min_row, ws.max_row, 0)) |
| 144 | elif len(month_list) == 1: | ||
| 145 | # reverse_trend_list process | ||
| 146 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) | ||
| 147 | reverse_trend_list.append(reverse_trend) | ||
| 148 | # month_info process | ||
| 149 | month_info = month_mapping.setdefault(month_list[0], []) | ||
| 150 | day_mean = np.mean(dti.day.dropna()) | ||
| 151 | if len(month_info) == 0: | ||
| 152 | month_info.append((ws.title, min_row, ws.max_row, day_mean)) | ||
| 153 | else: | ||
| 154 | for i, item in enumerate(month_info): | ||
| 155 | if day_mean <= item[-1]: | ||
| 156 | month_info.insert(i, (ws.title, min_row, ws.max_row, day_mean)) | ||
| 157 | break | ||
| 158 | else: | ||
| 159 | month_info.append((ws.title, min_row, ws.max_row, day_mean)) | ||
| 160 | else: | 144 | else: |
| 161 | # reverse_trend_list process | 145 | # reverse_trend_list process |
| 162 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) | 146 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) |
| 163 | reverse_trend_list.append(reverse_trend) | 147 | reverse_trend_list.append(reverse_trend) |
| 164 | # month_info process | 148 | # month_info process |
| 165 | for i, item in enumerate(month_list[:-1]): | 149 | day_idx = dti.day |
| 166 | month_mapping.setdefault(item, []).append( | 150 | idx_list_max_idx = len(idx_list) - 1 |
| 167 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, self.MAX_MEAN)) | 151 | for i, item in enumerate(month_list): |
| 168 | month_mapping.setdefault(month_list[-1], []).insert( | 152 | if i == idx_list_max_idx: |
| 169 | 0, (ws.title, idx_list[-1] + min_row, ws.max_row, 0)) | 153 | day_mean = np.mean(day_idx[idx_list[i]:].dropna()) |
| 154 | month_mapping.setdefault(item, []).append( | ||
| 155 | (ws.title, idx_list[i] + min_row, ws.max_row, day_mean)) | ||
| 156 | else: | ||
| 157 | day_mean = np.mean(day_idx[idx_list[i]: idx_list[i + 1]].dropna()) | ||
| 158 | month_mapping.setdefault(item, []).append( | ||
| 159 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) | ||
| 170 | 160 | ||
| 171 | def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): | 161 | def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): |
| 172 | if start_date is None or end_date is None: | 162 | if start_date is None or end_date is None: |
| ... | @@ -191,9 +181,9 @@ class BSWorkbook(Workbook): | ... | @@ -191,9 +181,9 @@ class BSWorkbook(Workbook): |
| 191 | def create_meta_sheet(self, card): | 181 | def create_meta_sheet(self, card): |
| 192 | if self.worksheets[0].title == 'Sheet': | 182 | if self.worksheets[0].title == 'Sheet': |
| 193 | ms = self.worksheets[0] | 183 | ms = self.worksheets[0] |
| 194 | ms.title = '{0}({1})'.format(self.meta_sheet_title, card) | 184 | ms.title = '{0}({1})'.format(self.meta_sheet_title, card[-6:]) |
| 195 | else: | 185 | else: |
| 196 | ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card)) | 186 | ms = self.create_sheet('{0}({1})'.format(self.meta_sheet_title, card[-6:])) |
| 197 | return ms | 187 | return ms |
| 198 | 188 | ||
| 199 | def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date): | 189 | def build_meta_sheet(self, card, confidence, code, print_time, start_date, end_date): |
| ... | @@ -203,6 +193,26 @@ class BSWorkbook(Workbook): | ... | @@ -203,6 +193,26 @@ class BSWorkbook(Workbook): |
| 203 | ms.append(row) | 193 | ms.append(row) |
| 204 | return ms | 194 | return ms |
| 205 | 195 | ||
| 196 | @staticmethod | ||
| 197 | def amount_format(amount_str): | ||
| 198 | if not isinstance(amount_str, str) or amount_str == '': | ||
| 199 | return amount_str | ||
| 200 | # 1.替换 | ||
| 201 | res_str = amount_str.translate(consts.TRANS) | ||
| 202 | # 2.删除多余的- | ||
| 203 | res_str = res_str[0] + res_str[1:].replace('-', '') | ||
| 204 | # 3.首字符处理 | ||
| 205 | if res_str[0] in consts.ERROR_CHARS: | ||
| 206 | res_str = '-{0}'.format(res_str[1:]) | ||
| 207 | # 4.逗号与句号处理 | ||
| 208 | if len(res_str) >= 4: | ||
| 209 | period_idx = len(res_str) - 3 | ||
| 210 | if res_str[period_idx] == '.' and res_str[period_idx - 1] == ',': | ||
| 211 | res_str = '{0}{1}'.format(res_str[:period_idx - 1], res_str[period_idx:]) | ||
| 212 | elif res_str[period_idx] == ',': | ||
| 213 | res_str = '{0}.{1}'.format(res_str[:period_idx], res_str[period_idx + 1:]) | ||
| 214 | return res_str | ||
| 215 | |||
| 206 | def build_month_sheet(self, card, month_mapping, ms, is_reverse): | 216 | def build_month_sheet(self, card, month_mapping, ms, is_reverse): |
| 207 | tmp_ws = self.create_sheet('tmp_ws') | 217 | tmp_ws = self.create_sheet('tmp_ws') |
| 208 | for month in sorted(month_mapping.keys()): | 218 | for month in sorted(month_mapping.keys()): |
| ... | @@ -235,29 +245,25 @@ class BSWorkbook(Workbook): | ... | @@ -235,29 +245,25 @@ class BSWorkbook(Workbook): |
| 235 | # 3.3.余额转数值 | 245 | # 3.3.余额转数值 |
| 236 | over_cell = rows[consts.OVER_IDX] | 246 | over_cell = rows[consts.OVER_IDX] |
| 237 | try: | 247 | try: |
| 238 | if isinstance(over_cell.value, str): | 248 | over_cell.value = locale.atof(self.amount_format(over_cell.value)) |
| 239 | over_cell.value = over_cell.value.translate(consts.TRANS) | ||
| 240 | over_cell.value = locale.atof(over_cell.value) | ||
| 241 | except Exception as e: | 249 | except Exception as e: |
| 242 | continue | 250 | continue |
| 243 | else: | 251 | else: |
| 244 | over_cell.number_format = numbers.FORMAT_NUMBER_COMMA_SEPARATED1 | 252 | over_cell.number_format = numbers.FORMAT_NUMBER_COMMA_SEPARATED1 |
| 245 | 253 | ||
| 246 | # 3.4.余额转数值 | 254 | # 3.4.金额转数值 |
| 247 | try: | 255 | try: |
| 248 | try: | 256 | try: |
| 249 | if isinstance(amount_cell.value, str): # TODO 可在转化数字失败后,再替换 | 257 | amount_cell.value = locale.atof(self.amount_format(amount_cell.value)) |
| 250 | amount_cell.value = amount_cell.value.translate(consts.TRANS) | ||
| 251 | amount_cell.value = locale.atof(amount_cell.value) | ||
| 252 | except Exception as e: | 258 | except Exception as e: |
| 253 | try: | 259 | try: |
| 254 | if isinstance(rows[consts.INCOME_IDX].value, str): | 260 | amount_cell.value = locale.atof(self.amount_format(rows[consts.INCOME_IDX].value)) |
| 255 | rows[consts.OUTLAY_IDX].value = rows[consts.INCOME_IDX].value.translate(consts.TRANS) | 261 | if amount_cell.value == 0: |
| 256 | amount_cell.value = locale.atof(rows[consts.OUTLAY_IDX].value) | 262 | raise |
| 263 | elif amount_cell.value < 0: | ||
| 264 | amount_cell.value = -amount_cell.value | ||
| 257 | except Exception as e: | 265 | except Exception as e: |
| 258 | if isinstance(rows[consts.OUTLAY_IDX].value, str): | 266 | amount_cell.value = locale.atof(self.amount_format(rows[consts.OUTLAY_IDX].value)) |
| 259 | rows[consts.OUTLAY_IDX].value = rows[consts.OUTLAY_IDX].value.translate(consts.TRANS) | ||
| 260 | amount_cell.value = locale.atof(rows[consts.OUTLAY_IDX].value) | ||
| 261 | if amount_cell.value > 0: | 267 | if amount_cell.value > 0: |
| 262 | amount_cell.value = -amount_cell.value | 268 | amount_cell.value = -amount_cell.value |
| 263 | except Exception as e: | 269 | except Exception as e: |
| ... | @@ -313,18 +319,18 @@ class BSWorkbook(Workbook): | ... | @@ -313,18 +319,18 @@ class BSWorkbook(Workbook): |
| 313 | # } | 319 | # } |
| 314 | for card, summary in bs_summary.items(): | 320 | for card, summary in bs_summary.items(): |
| 315 | # 1.原表修剪、排列、按照月份分割 | 321 | # 1.原表修剪、排列、按照月份分割 |
| 316 | start_date = summary['start_date'] | 322 | start_date = summary.get('start_date') |
| 317 | end_date = summary['end_date'] | 323 | end_date = summary.get('end_date') |
| 318 | date_statistics = False | 324 | date_statistics = False |
| 319 | if start_date is None or end_date is None: | 325 | if start_date is None or end_date is None: |
| 320 | date_statistics = True | 326 | date_statistics = True |
| 321 | date_list = [] | 327 | date_list = [] |
| 322 | month_mapping = {} | 328 | month_mapping = {} |
| 323 | reverse_trend_list = [] | 329 | reverse_trend_list = [] |
| 324 | for sheet in summary['sheet']: | 330 | for sheet in summary.get('sheet', []): |
| 325 | ws = self.get_sheet_by_name(sheet) | 331 | ws = self.get_sheet_by_name(sheet) |
| 326 | # 1.1.删除多余列、排列 | 332 | # 1.1.删除多余列、排列 |
| 327 | min_row = self.sheet_prune(ws, summary['classify']) | 333 | min_row = self.sheet_prune(ws, summary.get('classify', 0)) |
| 328 | # 1.2.按月份分割 | 334 | # 1.2.按月份分割 |
| 329 | self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics) | 335 | self.sheet_split(ws, month_mapping, reverse_trend_list, min_row, date_list, date_statistics) |
| 330 | 336 | ||
| ... | @@ -334,32 +340,43 @@ class BSWorkbook(Workbook): | ... | @@ -334,32 +340,43 @@ class BSWorkbook(Workbook): |
| 334 | 340 | ||
| 335 | # 2.元信息提取表 | 341 | # 2.元信息提取表 |
| 336 | ms = self.build_meta_sheet(card, | 342 | ms = self.build_meta_sheet(card, |
| 337 | summary['confidence'], | 343 | summary.get('confidence', 1), |
| 338 | summary['code'], | 344 | summary.get('code'), |
| 339 | summary['print_time'], | 345 | summary.get('print_time'), |
| 340 | start_date, | 346 | start_date, |
| 341 | end_date) | 347 | end_date) |
| 342 | 348 | ||
| 343 | # 3.创建月份表、提取/高亮关键行 | 349 | # 3.创建月份表、提取/高亮关键行 |
| 344 | is_reverse = False | 350 | # 倒序处理 |
| 345 | if sum(reverse_trend_list) > 0: # 倒序处理 | 351 | is_reverse = True if sum(reverse_trend_list) > 0 else False |
| 346 | is_reverse = True | 352 | for month_list in month_mapping.values(): |
| 347 | for month_list in month_mapping.values(): | 353 | month_list.sort(key=lambda x: x[-1], reverse=is_reverse) |
| 348 | month_list.sort(key=lambda x: x[-1], reverse=True) | 354 | |
| 349 | self.build_month_sheet(card, month_mapping, ms, is_reverse) | 355 | self.build_month_sheet(card, month_mapping, ms, is_reverse) |
| 350 | 356 | ||
| 351 | # 4.删除原表 | 357 | # 4.删除原表 |
| 352 | for sheet in summary['sheet']: | 358 | for sheet in summary.get('sheet'): |
| 353 | self.remove(self.get_sheet_by_name(sheet)) | 359 | self.remove(self.get_sheet_by_name(sheet)) |
| 354 | 360 | ||
| 355 | def license_rebuild(self, license_summary): | 361 | def license_rebuild(self, license_summary): |
| 356 | for en_key, cn_key in consts.LICENSE_ORDER: | 362 | for classify, (_, name) in consts.LICENSE_ORDER: |
| 357 | ws = self.create_sheet(cn_key) | 363 | res = license_summary.get(classify) |
| 358 | for bl in license_summary.get(en_key, []): | 364 | if res is None: |
| 365 | continue | ||
| 366 | ws = self.create_sheet(name) | ||
| 367 | for bl in res: | ||
| 359 | for bl_field in bl: | 368 | for bl_field in bl: |
| 360 | ws.append(bl_field) | 369 | ws.append(bl_field) |
| 361 | ws.append((None, )) | 370 | ws.append((None, )) |
| 362 | 371 | ||
| 363 | def rebuild(self, bs_summary, license_summary): | 372 | def skip_img_sheet(self, skip_img): |
| 373 | if skip_img: | ||
| 374 | ws = self.create_sheet(consts.SKIP_IMG_SHEET_NAME) | ||
| 375 | ws.append(consts.SKIP_IMG_SHEET_HEADER) | ||
| 376 | for img_tuple in skip_img: | ||
| 377 | ws.append(img_tuple) | ||
| 378 | |||
| 379 | def rebuild(self, bs_summary, license_summary, skip_img): | ||
| 364 | self.bs_rebuild(bs_summary) | 380 | self.bs_rebuild(bs_summary) |
| 365 | # self.license_rebuild(license_summary) | 381 | self.license_rebuild(license_summary) |
| 382 | self.skip_img_sheet(skip_img) | ... | ... |
| ... | @@ -25,7 +25,7 @@ class PDFHandler: | ... | @@ -25,7 +25,7 @@ class PDFHandler: |
| 25 | def __init__(self, path, img_dir_path): | 25 | def __init__(self, path, img_dir_path): |
| 26 | self.path = path | 26 | self.path = path |
| 27 | self.img_dir_path = img_dir_path | 27 | self.img_dir_path = img_dir_path |
| 28 | self.img_info_list = [] | 28 | self.img_path_list = [] |
| 29 | self.xref_set = set() | 29 | self.xref_set = set() |
| 30 | 30 | ||
| 31 | def get_img_save_path(self, pno, img_index=0, ext='png'): | 31 | def get_img_save_path(self, pno, img_index=0, ext='png'): |
| ... | @@ -38,7 +38,7 @@ class PDFHandler: | ... | @@ -38,7 +38,7 @@ class PDFHandler: |
| 38 | pm = page.getPixmap(matrix=trans_2, alpha=False) | 38 | pm = page.getPixmap(matrix=trans_2, alpha=False) |
| 39 | img_save_path = self.get_img_save_path(page.number) | 39 | img_save_path = self.get_img_save_path(page.number) |
| 40 | pm.writePNG(img_save_path) | 40 | pm.writePNG(img_save_path) |
| 41 | self.img_info_list.append((img_save_path, page.number, 0)) | 41 | self.img_path_list.append(img_save_path) |
| 42 | 42 | ||
| 43 | @staticmethod | 43 | @staticmethod |
| 44 | def getimage(pix): | 44 | def getimage(pix): |
| ... | @@ -88,7 +88,7 @@ class PDFHandler: | ... | @@ -88,7 +88,7 @@ class PDFHandler: |
| 88 | with open(img_save_path, "wb") as f: | 88 | with open(img_save_path, "wb") as f: |
| 89 | f.write(img_data) | 89 | f.write(img_data) |
| 90 | self.xref_set.add(xref) | 90 | self.xref_set.add(xref) |
| 91 | self.img_info_list.append((img_save_path, pno, img_index)) | 91 | self.img_path_list.append(img_save_path) |
| 92 | 92 | ||
| 93 | @staticmethod | 93 | @staticmethod |
| 94 | def split_il(il): | 94 | def split_il(il): |
| ... | @@ -179,7 +179,7 @@ class PDFHandler: | ... | @@ -179,7 +179,7 @@ class PDFHandler: |
| 179 | img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2]) | 179 | img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2]) |
| 180 | new_img.save(img_save_path) | 180 | new_img.save(img_save_path) |
| 181 | page_to_png = False | 181 | page_to_png = False |
| 182 | self.img_info_list.append((img_save_path, pno, img_index)) | 182 | self.img_path_list.append(img_save_path) |
| 183 | 183 | ||
| 184 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 | 184 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 |
| 185 | if page_to_png: | 185 | if page_to_png: | ... | ... |
-
Please register or sign in to post a comment