cb4acc51 by 周伟奇

ebank part 2

1 parent d24fcf2c
......@@ -39,6 +39,14 @@ class PDFHandler:
self.page_text_list = []
self.pdf_info = {}
self.img_path_pno_list = []
self.ebank_title_list = [
'微信支付交易明细证明',
'支付宝收支明细证明',
'招商银行交易流水',
'中国工商银行借记账户历史明细',
'中国建设银行个人活期账户全部交易明细',
'平安银行个人账户交易明细清单',
]
def get_suffix(self, file_name):
if file_name is None:
......@@ -260,20 +268,27 @@ class PDFHandler:
page = pdf.loadPage(pno)
self.page_to_png(page)
def title_is_ebank(self, char):
for title in self.ebank_title_list:
if title.find(char) != -1 or char.find(title) != -1:
return True
return False
def check_ebank(self, pdf):
# page_text_list = []
page_text_list = []
text_item_sum = 0
in_ebank_set = False
for pno in range(pdf.pageCount):
page = pdf.loadPage(pno)
# if page.rotation is None:
# rotation = 0
# elif isinstance(page.rotation, int):
# divisor, remainder = divmod(page.rotation, 90)
# if remainder != 0:
# return
# rotation = divmod(divisor, 4)[1]
# else:
# return
if page.rotation is None:
rotation = 0
elif isinstance(page.rotation, int):
divisor, remainder = divmod(page.rotation, 90)
if remainder != 0:
return
rotation = divmod(divisor, 4)[1]
else:
return
textpage = page.getTextPage()
text = textpage.extractDICT()
text_list = []
......@@ -284,22 +299,24 @@ class PDFHandler:
bbox = span.get('bbox')
if char.strip() == '':
continue
if pno == 0 and self.title_is_ebank(char):
in_ebank_set = True
text_list.append((bbox, char))
text_item_sum += len(text_list)
if text_item_sum < (pno + 1) * 5:
return
# else:
# page_text_list.append(
# {
# 'width': text.get('width'),
# 'height': text.get('height'),
# 'rotation': rotation,
# 'text': text_list
# }
# )
# self.is_ebank = True
else:
page_text_list.append(
{
'width': text.get('width'),
'height': text.get('height'),
'rotation': rotation,
'text': text_list
}
)
self.is_ebank = in_ebank_set
self.is_e_pdf = True
# self.page_text_list = page_text_list
self.page_text_list = page_text_list
def e_contract_process(self):
os.makedirs(self.img_dir_path, exist_ok=True)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!