793920a0 by 周伟奇

update wb build

1 parent f3d6e429
...@@ -11,6 +11,8 @@ class DocHandler: ...@@ -11,6 +11,8 @@ class DocHandler:
11 return '/data/{1}/{0}/{0}.pdf'.format(doc_id, business_type) 11 return '/data/{1}/{0}/{0}.pdf'.format(doc_id, business_type)
12 elif file == 'img': 12 elif file == 'img':
13 return '/data/{1}/{0}/{0}_img.zip'.format(doc_id, business_type) 13 return '/data/{1}/{0}/{0}_img.zip'.format(doc_id, business_type)
14 elif file == 'src_excel':
15 return '/data/{1}/{0}/src.xlsx'.format(doc_id, business_type)
14 else: 16 else:
15 return '/data/{1}/{0}/{0}.xlsx'.format(doc_id, business_type) 17 return '/data/{1}/{0}/{0}.xlsx'.format(doc_id, business_type)
16 18
...@@ -22,6 +24,7 @@ class DocHandler: ...@@ -22,6 +24,7 @@ class DocHandler:
22 doc_dict['pdf_link'] = self.get_link(doc_id, business_type) 24 doc_dict['pdf_link'] = self.get_link(doc_id, business_type)
23 doc_dict['img_link'] = self.get_link(doc_id, business_type, file='img') 25 doc_dict['img_link'] = self.get_link(doc_id, business_type, file='img')
24 doc_dict['excel_link'] = self.get_link(doc_id, business_type, file='excel') 26 doc_dict['excel_link'] = self.get_link(doc_id, business_type, file='excel')
27 doc_dict['src_excel_link'] = self.get_link(doc_id, business_type, file='src_excel')
25 return list(doc_queryset) 28 return list(doc_queryset)
26 29
27 @staticmethod 30 @staticmethod
......
...@@ -13,3 +13,4 @@ class KeywordsType(NamedEnum): ...@@ -13,3 +13,4 @@ class KeywordsType(NamedEnum):
13 INTEREST = (0, "利息") 13 INTEREST = (0, "利息")
14 SALARY = (1, '薪资') 14 SALARY = (1, '薪资')
15 LOAN = (2, '贷款') 15 LOAN = (2, '贷款')
16 ALI_WECHART = (3, '微信/支付宝')
......
...@@ -25,7 +25,7 @@ class PDFHandler: ...@@ -25,7 +25,7 @@ class PDFHandler:
25 def __init__(self, path, img_dir_path): 25 def __init__(self, path, img_dir_path):
26 self.path = path 26 self.path = path
27 self.img_dir_path = img_dir_path 27 self.img_dir_path = img_dir_path
28 self.img_path_list = [] 28 self.img_info_list = []
29 self.xref_set = set() 29 self.xref_set = set()
30 30
31 def get_img_save_path(self, pno, img_index=0, ext='png'): 31 def get_img_save_path(self, pno, img_index=0, ext='png'):
...@@ -38,7 +38,7 @@ class PDFHandler: ...@@ -38,7 +38,7 @@ class PDFHandler:
38 pm = page.getPixmap(matrix=trans_2, alpha=False) 38 pm = page.getPixmap(matrix=trans_2, alpha=False)
39 img_save_path = self.get_img_save_path(page.number) 39 img_save_path = self.get_img_save_path(page.number)
40 pm.writePNG(img_save_path) 40 pm.writePNG(img_save_path)
41 self.img_path_list.append(img_save_path) 41 self.img_info_list.append((img_save_path, page.number, 0))
42 42
43 @staticmethod 43 @staticmethod
44 def getimage(pix): 44 def getimage(pix):
...@@ -88,7 +88,7 @@ class PDFHandler: ...@@ -88,7 +88,7 @@ class PDFHandler:
88 with open(img_save_path, "wb") as f: 88 with open(img_save_path, "wb") as f:
89 f.write(img_data) 89 f.write(img_data)
90 self.xref_set.add(xref) 90 self.xref_set.add(xref)
91 self.img_path_list.append(img_save_path) 91 self.img_info_list.append((img_save_path, pno, img_index))
92 92
93 @staticmethod 93 @staticmethod
94 def split_il(il): 94 def split_il(il):
...@@ -179,7 +179,7 @@ class PDFHandler: ...@@ -179,7 +179,7 @@ class PDFHandler:
179 img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2]) 179 img_save_path = self.get_img_save_path(pno, img_index, im_list[0][2])
180 new_img.save(img_save_path) 180 new_img.save(img_save_path)
181 page_to_png = False 181 page_to_png = False
182 self.img_path_list.append(img_save_path) 182 self.img_info_list.append((img_save_path, pno, img_index))
183 183
184 # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 184 # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片
185 if page_to_png: 185 if page_to_png:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!