fix bug & add skip_img_sheet
Showing
4 changed files
with
175 additions
and
166 deletions
... | @@ -60,6 +60,8 @@ TRANS_MAP = { | ... | @@ -60,6 +60,8 @@ TRANS_MAP = { |
60 | } | 60 | } |
61 | TRANS = str.maketrans(TRANS_MAP) | 61 | TRANS = str.maketrans(TRANS_MAP) |
62 | ERROR_CHARS = {'.', '·', '•'} | 62 | ERROR_CHARS = {'.', '·', '•'} |
63 | SKIP_IMG_SHEET_NAME = '未处理图片' | ||
64 | SKIP_IMG_SHEET_HEADER = ('页码', '序号') | ||
63 | 65 | ||
64 | CARD_RATIO = 0.9 | 66 | CARD_RATIO = 0.9 |
65 | UNKNOWN_CARD = '未知卡号' | 67 | UNKNOWN_CARD = '未知卡号' | ... | ... |
... | @@ -80,19 +80,20 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -80,19 +80,20 @@ class Command(BaseCommand, LoggerMixin): |
80 | self.log_base, business_type, doc.id, pdf_path)) | 80 | self.log_base, business_type, doc.id, pdf_path)) |
81 | return doc_data_path, excel_path, src_excel_path, pdf_path | 81 | return doc_data_path, excel_path, src_excel_path, pdf_path |
82 | 82 | ||
83 | @staticmethod | 83 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, img_path, classify, skip_img): |
84 | def bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify): | ||
85 | sheets = ocr_data.get('data', []) | 84 | sheets = ocr_data.get('data', []) |
86 | if not sheets: | 85 | if not sheets: |
86 | skip_img.append(self.parse_img_path(img_path)) | ||
87 | return | 87 | return |
88 | confidence = ocr_data.get('confidence', 1) | 88 | confidence = ocr_data.get('confidence', 1) |
89 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | 89 | img_name, _ = os.path.splitext(os.path.basename(img_path)) |
90 | for i, sheet in enumerate(sheets): | 90 | for i, sheet in enumerate(sheets): |
91 | sheet_name = '{0}_{1}'.format(img_name, i) | ||
92 | ws = wb.create_sheet(sheet_name) | ||
93 | cells = sheet.get('cells') | 91 | cells = sheet.get('cells') |
94 | if not cells: | 92 | if not cells: |
93 | skip_img.append(self.parse_img_path(img_path)) | ||
95 | continue | 94 | continue |
95 | sheet_name = '{0}_{1}'.format(img_name, i) | ||
96 | ws = wb.create_sheet(sheet_name) | ||
96 | for cell in cells: | 97 | for cell in cells: |
97 | c1 = cell.get('start_column') | 98 | c1 = cell.get('start_column') |
98 | r1 = cell.get('start_row') | 99 | r1 = cell.get('start_row') |
... | @@ -147,9 +148,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -147,9 +148,10 @@ class Command(BaseCommand, LoggerMixin): |
147 | ed_list.append(summary[6]) | 148 | ed_list.append(summary[6]) |
148 | 149 | ||
149 | @staticmethod | 150 | @staticmethod |
150 | def license1_process(ocr_data, license_summary, classify): | 151 | def license1_process(ocr_data, license_summary, classify, skip_img, img_path): |
151 | license_data = ocr_data.get('data', []) | 152 | license_data = ocr_data.get('data', []) |
152 | if not license_data: | 153 | if not license_data: |
154 | skip_img.append(img_path) | ||
153 | return | 155 | return |
154 | for license_dict in license_data: | 156 | for license_dict in license_data: |
155 | res_list = [] | 157 | res_list = [] |
... | @@ -157,8 +159,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -157,8 +159,7 @@ class Command(BaseCommand, LoggerMixin): |
157 | res_list.append((field, value)) | 159 | res_list.append((field, value)) |
158 | license_summary.setdefault(classify, []).append(res_list) | 160 | license_summary.setdefault(classify, []).append(res_list) |
159 | 161 | ||
160 | @staticmethod | 162 | def license2_process(self, ocr_res_2, license_summary, pid, classify, skip_img, img_path): |
161 | def license2_process(ocr_res_2, license_summary, pid, classify): | ||
162 | if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET: | 163 | if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET: |
163 | if pid == consts.BC_PID: | 164 | if pid == consts.BC_PID: |
164 | # 银行卡 | 165 | # 银行卡 |
... | @@ -174,113 +175,16 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -174,113 +175,16 @@ class Command(BaseCommand, LoggerMixin): |
174 | res_list.append( | 175 | res_list.append( |
175 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) | 176 | (field_dict.get('chn_key', ''), field_dict.get('value', ''))) |
176 | license_summary.setdefault(classify, []).append(res_list) | 177 | license_summary.setdefault(classify, []).append(res_list) |
177 | |||
178 | async def fetch_ocr_result(self, url, json_data): | ||
179 | async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: | ||
180 | async with session.post(url, json=json_data) as response: | ||
181 | if response.status == 200: | ||
182 | return await response.json() | ||
183 | |||
184 | # async def img_2_ocr_2_wb(self, wb, img_path, summary): | ||
185 | # res = await self.fetch_ocr_result(img_path) | ||
186 | # self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res)) | ||
187 | # sheets_list = res.get('result').get('res') | ||
188 | # img_name = os.path.basename(img_path) | ||
189 | # self.append_sheet(wb, sheets_list, img_name, summary) | ||
190 | |||
191 | async def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | ||
192 | with open(img_path, 'rb') as f: | ||
193 | base64_data = base64.b64encode(f.read()) | ||
194 | # 获取解码后的base64值 | ||
195 | file_data = base64_data.decode() | ||
196 | json_data_1 = { | ||
197 | "file": file_data | ||
198 | } | ||
199 | ocr_res_1 = await self.fetch_ocr_result(self.ocr_url_1, json_data_1) | ||
200 | if ocr_res_1 is None: | ||
201 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
202 | else: | 178 | else: |
203 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | 179 | skip_img.append(self.parse_img_path(img_path)) |
204 | self.log_base, img_path, ocr_res_1)) | ||
205 | |||
206 | if ocr_res_1.get('code') == 1: | ||
207 | ocr_data = ocr_res_1.get('data', {}) | ||
208 | classify = ocr_data.get('classify') | ||
209 | if classify is None: | ||
210 | return | ||
211 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | ||
212 | return | ||
213 | elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1 | ||
214 | self.license1_process(ocr_data, license_summary, classify) | ||
215 | elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2 | ||
216 | pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) | ||
217 | json_data_2 = { | ||
218 | "pid": str(pid), | ||
219 | "key": conf.OCR_KEY, | ||
220 | "secret": conf.OCR_SECRET, | ||
221 | "file": file_data | ||
222 | } | ||
223 | ocr_res_2 = await self.fetch_ocr_result(self.ocr_url_2, json_data_2) | ||
224 | if ocr_res_2 is None: | ||
225 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
226 | else: | ||
227 | # 识别结果 | ||
228 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | ||
229 | self.log_base, img_path, ocr_res_2)) | ||
230 | self.license2_process(ocr_res_2, license_summary, pid, classify) | ||
231 | else: # 流水处理 | ||
232 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | ||
233 | 180 | ||
234 | # def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): | 181 | # async def fetch_ocr_result(self, url, json_data): |
235 | # # # 流水 | 182 | # async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: |
236 | # # res = { | 183 | # async with session.post(url, json=json_data) as response: |
237 | # # 'code': 1, | 184 | # if response.status == 200: |
238 | # # 'msg': 'success', | 185 | # return await response.json() |
239 | # # 'data': { | 186 | # |
240 | # # 'classify': 0, | 187 | # async def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary): |
241 | # # 'confidence': 0.999, | ||
242 | # # 'data': [ | ||
243 | # # { | ||
244 | # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
245 | # # 'cells': [] | ||
246 | # # }, | ||
247 | # # { | ||
248 | # # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
249 | # # 'cells': [] | ||
250 | # # } | ||
251 | # # ] | ||
252 | # # } | ||
253 | # # } | ||
254 | # # | ||
255 | # # # 证件-1 | ||
256 | # # res = { | ||
257 | # # 'code': 1, | ||
258 | # # 'msg': 'success', | ||
259 | # # 'data': { | ||
260 | # # 'classify': 0, | ||
261 | # # 'confidence': 0.999, | ||
262 | # # 'data': [ | ||
263 | # # { | ||
264 | # # 'cn_key': 'value', | ||
265 | # # 'cn_key': 'value', | ||
266 | # # }, | ||
267 | # # { | ||
268 | # # 'cn_key': 'value', | ||
269 | # # 'cn_key': 'value', | ||
270 | # # }, | ||
271 | # # ] | ||
272 | # # } | ||
273 | # # } | ||
274 | # # | ||
275 | # # # 证件-2 or 其他类 | ||
276 | # # res = { | ||
277 | # # 'code': 1, | ||
278 | # # 'msg': 'success', | ||
279 | # # 'data': { | ||
280 | # # 'classify': 0, | ||
281 | # # 'confidence': 0.999, | ||
282 | # # } | ||
283 | # # } | ||
284 | # with open(img_path, 'rb') as f: | 188 | # with open(img_path, 'rb') as f: |
285 | # base64_data = base64.b64encode(f.read()) | 189 | # base64_data = base64.b64encode(f.read()) |
286 | # # 获取解码后的base64值 | 190 | # # 获取解码后的base64值 |
... | @@ -288,9 +192,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -288,9 +192,10 @@ class Command(BaseCommand, LoggerMixin): |
288 | # json_data_1 = { | 192 | # json_data_1 = { |
289 | # "file": file_data | 193 | # "file": file_data |
290 | # } | 194 | # } |
291 | # response_1 = requests.post(self.ocr_url_1, json=json_data_1) | 195 | # ocr_res_1 = await self.fetch_ocr_result(self.ocr_url_1, json_data_1) |
292 | # if response_1.status_code == 200: | 196 | # if ocr_res_1 is None: |
293 | # ocr_res_1 = response_1.json() | 197 | # raise Exception('ocr 1 error, img_path={0}'.format(img_path)) |
198 | # else: | ||
294 | # self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | 199 | # self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( |
295 | # self.log_base, img_path, ocr_res_1)) | 200 | # self.log_base, img_path, ocr_res_1)) |
296 | # | 201 | # |
... | @@ -311,21 +216,119 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -311,21 +216,119 @@ class Command(BaseCommand, LoggerMixin): |
311 | # "secret": conf.OCR_SECRET, | 216 | # "secret": conf.OCR_SECRET, |
312 | # "file": file_data | 217 | # "file": file_data |
313 | # } | 218 | # } |
314 | # response_2 = requests.post(self.ocr_url_2, data=json_data_2) | 219 | # ocr_res_2 = await self.fetch_ocr_result(self.ocr_url_2, json_data_2) |
315 | # if response_2.status_code == 200: | 220 | # if ocr_res_2 is None: |
221 | # raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
222 | # else: | ||
316 | # # 识别结果 | 223 | # # 识别结果 |
317 | # ocr_res_2 = response_2.json() | ||
318 | # self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | 224 | # self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( |
319 | # self.log_base, img_path, ocr_res_2)) | 225 | # self.log_base, img_path, ocr_res_2)) |
320 | # self.license2_process(ocr_res_2, license_summary, pid, classify) | 226 | # self.license2_process(ocr_res_2, license_summary, pid, classify) |
321 | # else: | ||
322 | # raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
323 | # else: # 流水处理 | 227 | # else: # 流水处理 |
324 | # self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | 228 | # self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) |
325 | # else: | 229 | |
326 | # pass | 230 | def img_2_ocr_2_wb(self, wb, img_path, bs_summary, unknown_summary, license_summary, skip_img): |
327 | # else: | 231 | # # 流水 |
328 | # raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | 232 | # res = { |
233 | # 'code': 1, | ||
234 | # 'msg': 'success', | ||
235 | # 'data': { | ||
236 | # 'classify': 0, | ||
237 | # 'confidence': 0.999, | ||
238 | # 'data': [ | ||
239 | # { | ||
240 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
241 | # 'cells': [] | ||
242 | # }, | ||
243 | # { | ||
244 | # 'summary': ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'], | ||
245 | # 'cells': [] | ||
246 | # } | ||
247 | # ] | ||
248 | # } | ||
249 | # } | ||
250 | # | ||
251 | # # 证件-1 | ||
252 | # res = { | ||
253 | # 'code': 1, | ||
254 | # 'msg': 'success', | ||
255 | # 'data': { | ||
256 | # 'classify': 0, | ||
257 | # 'confidence': 0.999, | ||
258 | # 'data': [ | ||
259 | # { | ||
260 | # 'cn_key': 'value', | ||
261 | # 'cn_key': 'value', | ||
262 | # }, | ||
263 | # { | ||
264 | # 'cn_key': 'value', | ||
265 | # 'cn_key': 'value', | ||
266 | # }, | ||
267 | # ] | ||
268 | # } | ||
269 | # } | ||
270 | # | ||
271 | # # 证件-2 or 其他类 | ||
272 | # res = { | ||
273 | # 'code': 1, | ||
274 | # 'msg': 'success', | ||
275 | # 'data': { | ||
276 | # 'classify': 0, | ||
277 | # 'confidence': 0.999, | ||
278 | # } | ||
279 | # } | ||
280 | with open(img_path, 'rb') as f: | ||
281 | base64_data = base64.b64encode(f.read()) | ||
282 | # 获取解码后的base64值 | ||
283 | file_data = base64_data.decode() | ||
284 | json_data_1 = { | ||
285 | "file": file_data | ||
286 | } | ||
287 | response_1 = requests.post(self.ocr_url_1, json=json_data_1) | ||
288 | if response_1.status_code == 200: | ||
289 | ocr_res_1 = response_1.json() | ||
290 | self.cronjob_log.info('{0} [ocr_1 result] [img={1}] [res={2}]'.format( | ||
291 | self.log_base, img_path, ocr_res_1)) | ||
292 | |||
293 | if ocr_res_1.get('code') == 1: | ||
294 | ocr_data = ocr_res_1.get('data', {}) | ||
295 | classify = ocr_data.get('classify') | ||
296 | if classify is None: | ||
297 | skip_img.append(self.parse_img_path(img_path)) | ||
298 | return | ||
299 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | ||
300 | skip_img.append(self.parse_img_path(img_path)) | ||
301 | return | ||
302 | elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1 | ||
303 | self.license1_process(ocr_data, license_summary, classify, skip_img, img_path) | ||
304 | elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2 | ||
305 | pid, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) | ||
306 | json_data_2 = { | ||
307 | "pid": str(pid), | ||
308 | "key": conf.OCR_KEY, | ||
309 | "secret": conf.OCR_SECRET, | ||
310 | "file": file_data | ||
311 | } | ||
312 | response_2 = requests.post(self.ocr_url_2, data=json_data_2) | ||
313 | if response_2.status_code == 200: | ||
314 | # 识别结果 | ||
315 | ocr_res_2 = response_2.json() | ||
316 | self.cronjob_log.info('{0} [ocr_2 result] [img={1}] [res={2}]'.format( | ||
317 | self.log_base, img_path, ocr_res_2)) | ||
318 | self.license2_process(ocr_res_2, license_summary, pid, classify, skip_img, img_path) | ||
319 | else: | ||
320 | raise Exception('ocr 2 error, img_path={0}'.format(img_path)) | ||
321 | else: # 流水处理 | ||
322 | self.bs_process(wb, ocr_data, bs_summary, unknown_summary, img_path, classify) | ||
323 | else: | ||
324 | skip_img.append(self.parse_img_path(img_path)) | ||
325 | else: | ||
326 | raise Exception('ocr 1 error, img_path={0}'.format(img_path)) | ||
327 | |||
328 | @staticmethod | ||
329 | def parse_img_path(img_path): | ||
330 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | ||
331 | return img_name[5], img_name[11] | ||
329 | 332 | ||
330 | @staticmethod | 333 | @staticmethod |
331 | def get_most(value_list): | 334 | def get_most(value_list): |
... | @@ -425,8 +428,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -425,8 +428,10 @@ class Command(BaseCommand, LoggerMixin): |
425 | merged_bs_summary[card] = summary | 428 | merged_bs_summary[card] = summary |
426 | else: | 429 | else: |
427 | # 1卡号 | 430 | # 1卡号 |
431 | one_card = False | ||
428 | if len(bs_summary) == 1: | 432 | if len(bs_summary) == 1: |
429 | merged_bs_summary = self.prune_bs_summary(bs_summary) | 433 | merged_bs_summary = self.prune_bs_summary(bs_summary) |
434 | one_card = True | ||
430 | # 多卡号 | 435 | # 多卡号 |
431 | else: | 436 | else: |
432 | merged_bs_summary = self.merge_card(bs_summary) | 437 | merged_bs_summary = self.merge_card(bs_summary) |
... | @@ -435,7 +440,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -435,7 +440,7 @@ class Command(BaseCommand, LoggerMixin): |
435 | merge_role = [] | 440 | merge_role = [] |
436 | classify_summary = unknown_summary.get(card_summary['classify'], {}) | 441 | classify_summary = unknown_summary.get(card_summary['classify'], {}) |
437 | for role, summary in classify_summary.items(): | 442 | for role, summary in classify_summary.items(): |
438 | if role in card_summary['role_set']: | 443 | if one_card or role in card_summary['role_set']: |
439 | merge_role.append(role) | 444 | merge_role.append(role) |
440 | card_summary['sheet'].extend(summary['sheet']) | 445 | card_summary['sheet'].extend(summary['sheet']) |
441 | card_summary['code'].extend(summary['code']) | 446 | card_summary['code'].extend(summary['code']) |
... | @@ -503,6 +508,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -503,6 +508,7 @@ class Command(BaseCommand, LoggerMixin): |
503 | bs_summary = {} | 508 | bs_summary = {} |
504 | license_summary = {} | 509 | license_summary = {} |
505 | unknown_summary = {} | 510 | unknown_summary = {} |
511 | skip_img = [] | ||
506 | interest_keyword = Keywords.objects.filter( | 512 | interest_keyword = Keywords.objects.filter( |
507 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) | 513 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) |
508 | salary_keyword = Keywords.objects.filter( | 514 | salary_keyword = Keywords.objects.filter( |
... | @@ -515,27 +521,29 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -515,27 +521,29 @@ class Command(BaseCommand, LoggerMixin): |
515 | # wb = Workbook() | 521 | # wb = Workbook() |
516 | 522 | ||
517 | # 4.1 获取OCR结果 | 523 | # 4.1 获取OCR结果 |
518 | loop = asyncio.get_event_loop() | 524 | # loop = asyncio.get_event_loop() |
519 | tasks = [self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) | 525 | # tasks = [self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) |
520 | for img_path in pdf_handler.img_path_list] | 526 | # for img_path in pdf_handler.img_path_list] |
521 | loop.run_until_complete(asyncio.wait(tasks)) | 527 | # loop.run_until_complete(asyncio.wait(tasks)) |
522 | # loop.close() | 528 | # loop.close() |
523 | 529 | ||
524 | # for img_path in pdf_handler.img_path_list: | 530 | for img_path in pdf_handler.img_path_list: |
525 | # self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary) | 531 | self.img_2_ocr_2_wb(wb, img_path, bs_summary, unknown_summary, license_summary, skip_img) |
526 | 532 | ||
527 | self.cronjob_log.info('{0} [bs_summary={1}] [unknown_summary={2}] [license_summary={3}]'.format( | 533 | self.cronjob_log.info('{0} [business_type={1}] [doc_id={2}] [bs_summary={3}] [unknown_summary={4}] ' |
528 | self.log_base, bs_summary, unknown_summary, license_summary)) | 534 | '[license_summary={5}]'.format(self.log_base, business_type, doc.id, bs_summary, |
535 | unknown_summary, license_summary)) | ||
529 | 536 | ||
530 | merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) | 537 | merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) |
531 | 538 | ||
532 | self.cronjob_log.info('{0} [merged_bs_summary={1}] [unknown_summary={2}]'.format( | 539 | self.cronjob_log.info('{0} [business_type={1}] [doc_id={2}] [merged_bs_summary={3}] ' |
533 | self.log_base, merged_bs_summary, unknown_summary)) | 540 | '[unknown_summary={4}]'.format(self.log_base, business_type, doc.id, |
541 | merged_bs_summary, unknown_summary)) | ||
534 | del unknown_summary | 542 | del unknown_summary |
535 | 543 | ||
536 | # 4.2 重构Excel文件 | 544 | # 4.2 重构Excel文件 |
537 | wb.save(src_excel_path) | 545 | wb.save(src_excel_path) |
538 | wb.rebuild(merged_bs_summary, license_summary) | 546 | wb.rebuild(merged_bs_summary, license_summary, skip_img) |
539 | wb.save(excel_path) | 547 | wb.save(excel_path) |
540 | except Exception as e: | 548 | except Exception as e: |
541 | doc.status = DocStatus.PROCESS_FAILED.value | 549 | doc.status = DocStatus.PROCESS_FAILED.value | ... | ... |
... | @@ -141,32 +141,22 @@ class BSWorkbook(Workbook): | ... | @@ -141,32 +141,22 @@ class BSWorkbook(Workbook): |
141 | # month_info process | 141 | # month_info process |
142 | month_info = month_mapping.setdefault('xxxx-xx', []) | 142 | month_info = month_mapping.setdefault('xxxx-xx', []) |
143 | month_info.append((ws.title, min_row, ws.max_row, 0)) | 143 | month_info.append((ws.title, min_row, ws.max_row, 0)) |
144 | elif len(month_list) == 1: | ||
145 | # reverse_trend_list process | ||
146 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) | ||
147 | reverse_trend_list.append(reverse_trend) | ||
148 | # month_info process | ||
149 | month_info = month_mapping.setdefault(month_list[0], []) | ||
150 | day_mean = np.mean(dti.day.dropna()) | ||
151 | if len(month_info) == 0: | ||
152 | month_info.append((ws.title, min_row, ws.max_row, day_mean)) | ||
153 | else: | ||
154 | for i, item in enumerate(month_info): | ||
155 | if day_mean <= item[-1]: | ||
156 | month_info.insert(i, (ws.title, min_row, ws.max_row, day_mean)) | ||
157 | break | ||
158 | else: | ||
159 | month_info.append((ws.title, min_row, ws.max_row, day_mean)) | ||
160 | else: | 144 | else: |
161 | # reverse_trend_list process | 145 | # reverse_trend_list process |
162 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) | 146 | reverse_trend = self.get_reverse_trend(dti.day, idx_list) |
163 | reverse_trend_list.append(reverse_trend) | 147 | reverse_trend_list.append(reverse_trend) |
164 | # month_info process | 148 | # month_info process |
165 | for i, item in enumerate(month_list[:-1]): | 149 | day_idx = dti.day |
166 | month_mapping.setdefault(item, []).append( | 150 | idx_list_max_idx = len(idx_list) - 1 |
167 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, self.MAX_MEAN)) | 151 | for i, item in enumerate(month_list): |
168 | month_mapping.setdefault(month_list[-1], []).insert( | 152 | if i == idx_list_max_idx: |
169 | 0, (ws.title, idx_list[-1] + min_row, ws.max_row, 0)) | 153 | day_mean = np.mean(day_idx[idx_list[i]:].dropna()) |
154 | month_mapping.setdefault(item, []).append( | ||
155 | (ws.title, idx_list[i] + min_row, ws.max_row, day_mean)) | ||
156 | else: | ||
157 | day_mean = np.mean(day_idx[idx_list[i]: idx_list[i + 1]].dropna()) | ||
158 | month_mapping.setdefault(item, []).append( | ||
159 | (ws.title, idx_list[i] + min_row, idx_list[i + 1] + min_row - 1, day_mean)) | ||
170 | 160 | ||
171 | def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): | 161 | def build_metadata_rows(self, confidence, code, print_time, start_date, end_date): |
172 | if start_date is None or end_date is None: | 162 | if start_date is None or end_date is None: |
... | @@ -259,7 +249,7 @@ class BSWorkbook(Workbook): | ... | @@ -259,7 +249,7 @@ class BSWorkbook(Workbook): |
259 | except Exception as e: | 249 | except Exception as e: |
260 | continue | 250 | continue |
261 | else: | 251 | else: |
262 | over_cell.number_format = numbers.FORMAT_NUMBER_COMMA_SEPARATED1 | 252 | over_cell.number_format = numbers.FORMAT_GENERAL |
263 | 253 | ||
264 | # 3.4.金额转数值 | 254 | # 3.4.金额转数值 |
265 | try: | 255 | try: |
... | @@ -281,7 +271,7 @@ class BSWorkbook(Workbook): | ... | @@ -281,7 +271,7 @@ class BSWorkbook(Workbook): |
281 | else: | 271 | else: |
282 | if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET: | 272 | if rows[consts.BORROW_IDX].value in consts.BORROW_OUTLAY_SET: |
283 | amount_cell.value = -amount_cell.value | 273 | amount_cell.value = -amount_cell.value |
284 | amount_cell.number_format = numbers.FORMAT_NUMBER_COMMA_SEPARATED1 | 274 | amount_cell.number_format = numbers.FORMAT_GENERAL |
285 | same_amount_mapping = amount_mapping.get(date_cell.value, {}) | 275 | same_amount_mapping = amount_mapping.get(date_cell.value, {}) |
286 | fill_rows = same_amount_mapping.get(-amount_cell.value) | 276 | fill_rows = same_amount_mapping.get(-amount_cell.value) |
287 | if fill_rows: | 277 | if fill_rows: |
... | @@ -357,11 +347,11 @@ class BSWorkbook(Workbook): | ... | @@ -357,11 +347,11 @@ class BSWorkbook(Workbook): |
357 | end_date) | 347 | end_date) |
358 | 348 | ||
359 | # 3.创建月份表、提取/高亮关键行 | 349 | # 3.创建月份表、提取/高亮关键行 |
360 | is_reverse = False | 350 | # 倒序处理 |
361 | if sum(reverse_trend_list) > 0: # 倒序处理 | 351 | is_reverse = True if sum(reverse_trend_list) > 0 else False |
362 | is_reverse = True | 352 | for month_list in month_mapping.values(): |
363 | for month_list in month_mapping.values(): | 353 | month_list.sort(key=lambda x: x[-1], reverse=is_reverse) |
364 | month_list.sort(key=lambda x: x[-1], reverse=True) | 354 | |
365 | self.build_month_sheet(card, month_mapping, ms, is_reverse) | 355 | self.build_month_sheet(card, month_mapping, ms, is_reverse) |
366 | 356 | ||
367 | # 4.删除原表 | 357 | # 4.删除原表 |
... | @@ -379,6 +369,14 @@ class BSWorkbook(Workbook): | ... | @@ -379,6 +369,14 @@ class BSWorkbook(Workbook): |
379 | ws.append(bl_field) | 369 | ws.append(bl_field) |
380 | ws.append((None, )) | 370 | ws.append((None, )) |
381 | 371 | ||
382 | def rebuild(self, bs_summary, license_summary): | 372 | def skip_img_sheet(self, skip_img): |
373 | if skip_img: | ||
374 | ws = self.create_sheet(consts.SKIP_IMG_SHEET_NAME) | ||
375 | ws.append(consts.SKIP_IMG_SHEET_HEADER) | ||
376 | for img_tuple in skip_img: | ||
377 | ws.append(img_tuple) | ||
378 | |||
379 | def rebuild(self, bs_summary, license_summary, skip_img): | ||
383 | self.bs_rebuild(bs_summary) | 380 | self.bs_rebuild(bs_summary) |
384 | self.license_rebuild(license_summary) | 381 | self.license_rebuild(license_summary) |
382 | self.skip_img_sheet(skip_img) | ... | ... |
-
Please register or sign in to post a comment