a220590e by 周伟奇

update ocr url

1 parent 10a4a80f
......@@ -28,16 +28,8 @@ class Command(BaseCommand, LoggerMixin):
self.switch = True
# 数据目录
self.data_dir = conf.DATA_DIR
# pdf页面转图片
self.zoom_x = 2.0
self.zoom_y = 2.0
self.trans = fitz.Matrix(self.zoom_x, self.zoom_y).preRotate(0) # zoom factor 2 in each dimension
# ocr相关
self.ocr_url = conf.OCR_URL
self.ocr_header = {
'X-Auth-Token': conf.OCR_TOKEN,
'Content-Type': 'application/json'
}
# EDMS web_service_api
self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD)
# 优雅退出信号:15
......@@ -103,12 +95,6 @@ class Command(BaseCommand, LoggerMixin):
words = cell.get('words')
ws.cell(row=r1+1, column=c1+1, value=words)
@staticmethod
def get_ocr_json(img_path):
with open(img_path, "rb") as f:
base64_data = base64.b64encode(f.read())
return {'imgBase64': base64_data.decode('utf-8')}
# async def fetch_ocr_result(self, img_path):
# async with aiohttp.ClientSession(
# headers=self.ocr_header, connector=aiohttp.TCPConnector(ssl=False)
......@@ -125,15 +111,20 @@ class Command(BaseCommand, LoggerMixin):
# self.append_sheet(wb, sheets_list, img_name, role_summary)
def fetch_ocr_result(self, img_path):
json_data = self.get_ocr_json(img_path)
response = requests.post(self.ocr_url, json=json_data, headers=self.ocr_header)
# payload = {'name': 'page_0_img_0_0'}
files = [
('img', open(img_path, 'rb'))
]
response = requests.request("POST", self.ocr_url, files=files)
return response.json()
def img_ocr_excel(self, wb, img_path, role_summary):
res = self.fetch_ocr_result(img_path)
self.cronjob_log.info('{0} [fetch ocr result success] [img={1}] [res={2}]'.format(self.log_base, img_path, res))
if res.get('code') == '1':
sheets_list = res.get('result').get('res')
if res.get('code') == 1:
sheets_list = res.get('data')
if not sheets_list:
return
img_name = os.path.basename(img_path)
self.append_sheet(wb, sheets_list, img_name, role_summary)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!