0b2cf2d1 by 周伟奇

fix zfb emoji

1 parent 090b26b2
...@@ -82,9 +82,12 @@ class PDFHandler: ...@@ -82,9 +82,12 @@ class PDFHandler:
82 82
83 @staticmethod 83 @staticmethod
84 def get_pwd_list(doc_name, pwd_list): 84 def get_pwd_list(doc_name, pwd_list):
85 try:
85 pwd_list_from_doc_name = re.findall(r'\d{6}', doc_name) 86 pwd_list_from_doc_name = re.findall(r'\d{6}', doc_name)
86 pwd_list_from_doc_name.extend(pwd_list) 87 pwd_list_from_doc_name.extend(pwd_list)
87 return pwd_list_from_doc_name 88 return pwd_list_from_doc_name
89 except Exception as e:
90 return pwd_list
88 91
89 def get_suffix(self, file_name): 92 def get_suffix(self, file_name):
90 if file_name is None: 93 if file_name is None:
...@@ -355,9 +358,17 @@ class PDFHandler: ...@@ -355,9 +358,17 @@ class PDFHandler:
355 for line in block.get('lines'): 358 for line in block.get('lines'):
356 for span in line.get('spans'): 359 for span in line.get('spans'):
357 char = span.get('text') 360 char = span.get('text')
358 bbox = span.get('bbox') 361
359 if char.strip() == '': 362 if char.strip() == '':
360 continue 363 continue
364
365 # 特殊emoji跳过
366 try:
367 print(char)
368 except Exception as e:
369 continue
370
371 bbox = span.get('bbox')
361 if pno == 0 and self.title_is_ebank(char): 372 if pno == 0 and self.title_is_ebank(char):
362 in_ebank_set = True 373 in_ebank_set = True
363 text_list.append((bbox, char)) 374 text_list.append((bbox, char))
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!