ocr process

周伟奇
Showing 4 changed files with 29 additions and 4 deletions
requirements/base.txt
src/apps/doc/management/commands/doc_process.py
src/apps/doc/mixins.py
src/common/tools/file_tools.py
--- a/requirements/base.txt
View file @a02a957
+++ b/requirements/base.txt
View file @a02a957
+aiohttp==3.6.2
+async-timeout==3.0.1
+attrs==19.3.0
 certifi==2016.2.28
+chardet==3.0.4
 Django==2.1
 # django-mysqlpool @ https://github.com/smartfile/django-mysqlpool/archive/master.zip
 djangorestframework==3.9.0
 djangorestframework-jwt==1.11.0
+idna==2.9
+idna-ssl==1.1.0
 marshmallow==3.6.1
+multidict==4.7.6
 pdfminer3k==1.3.4
 Pillow==7.1.2
 ply==3.11
@@ -17,4 +24,7 @@ redis==3.4.1
 # situlogger @ http://gitlab.situdata.com/zhouweiqi/situlogger/repository/archive.tar.gz?ref=master
 six==1.14.0
 SQLAlchemy==0.9.10
+typing-extensions==3.7.4.2
 webargs==6.1.0
+xlwt==1.3.0
+yarl==1.4.2
--- a/src/apps/doc/management/commands/doc_process.py
View file @a02a957
+++ b/src/apps/doc/management/commands/doc_process.py
View file @a02a957
--- a/src/apps/doc/mixins.py
View file @a02a957
+++ b/src/apps/doc/mixins.py
View file @a02a957
@@ -7,13 +7,12 @@ class DocHandler:

    @staticmethod
    def get_link(doc_id, file='pdf'):
-        data_path = os.path.join(conf.DATA_DIR, str(doc_id))
        if file == 'pdf':
-            return os.path.join(data_path, '{0}.pdf'.format(str(doc_id)))
+            return '/data/{0}/{0}.pdf'.format(doc_id)
        elif file == 'img':
-            return os.path.join(data_path, '{0}_img.zip'.format(str(doc_id)))
+            return '/data/{0}/{0}_img.zip'.format(doc_id)
        else:
-            return os.path.join(data_path, '{0}.xlsx'.format(str(doc_id)))
+            return '/data/{0}/{0}.xls'.format(doc_id)

    def get_doc_list(self, doc_queryset):
        for doc_dict in doc_queryset:
--- a/src/common/tools/file_tools.py
View file @a02a957
+++ b/src/common/tools/file_tools.py
View file @a02a957
+import os
+from zipfile import ZipFile
+
+
 def file_write(file, file_path):
    with open(file_path, 'wb+') as f:
        for chunk in file.chunks():
            f.write(chunk)
+
+
+def write_zip_file(dir_name, zipfile_path):
+    if not os.path.isdir(dir_name):
+        return
+    with ZipFile(zipfile_path, 'w') as z:
+        for root, dirs, files in os.walk(dir_name):
+            root_target_path = root.replace(dir_name, '')
+            for single_file in files:
+                src_file_path = os.path.join(root, single_file)
+                file_target_path = os.path.join(root_target_path, single_file)
+                z.write(src_file_path, file_target_path)