doc process part 1
Showing
10 changed files
with
195 additions
and
183 deletions
... | @@ -4,12 +4,17 @@ Django==2.1 | ... | @@ -4,12 +4,17 @@ Django==2.1 |
4 | djangorestframework==3.9.0 | 4 | djangorestframework==3.9.0 |
5 | djangorestframework-jwt==1.11.0 | 5 | djangorestframework-jwt==1.11.0 |
6 | marshmallow==3.6.1 | 6 | marshmallow==3.6.1 |
7 | pdfminer3k==1.3.4 | ||
8 | Pillow==7.1.2 | ||
9 | ply==3.11 | ||
7 | PyJWT==1.7.1 | 10 | PyJWT==1.7.1 |
8 | PyMuPDF==1.17.0 | 11 | PyMuPDF==1.17.0 |
9 | PyMySQL==0.9.3 | 12 | PyMySQL==0.9.3 |
10 | pytz==2020.1 | 13 | pytz==2020.1 |
11 | # simple-config @ http://gitlab.situdata.com/zhouweiqi/simple_config/repository/archive.tar.gz?ref=master | 14 | PyYAML==5.3.1 |
12 | # situlogger @ http://gitlab.situdata.com/zhouweiqi/situlogger/repository/archive.tar.gz?ref=master | 15 | redis==3.4.1 |
16 | simple-config @ http://gitlab.situdata.com/zhouweiqi/simple_config/repository/archive.tar.gz?ref=master | ||
17 | situlogger @ http://gitlab.situdata.com/zhouweiqi/situlogger/repository/archive.tar.gz?ref=master | ||
13 | six==1.14.0 | 18 | six==1.14.0 |
14 | SQLAlchemy==0.9.10 | 19 | SQLAlchemy==0.9.10 |
15 | webargs==6.1.0 | 20 | webargs==6.1.0 | ... | ... |
1 | import time | 1 | import time |
2 | import os | ||
2 | import signal | 3 | import signal |
4 | import fitz | ||
5 | from PIL import Image | ||
6 | from io import BytesIO | ||
3 | 7 | ||
4 | from django.core.management import BaseCommand | 8 | from django.core.management import BaseCommand |
9 | from common.mixins import LoggerMixin | ||
10 | from common.redis_cache import redis_handler as rh | ||
11 | from apps.doc.models import UploadDocRecords | ||
12 | from settings import conf | ||
5 | 13 | ||
6 | 14 | ||
7 | class Command(BaseCommand): | 15 | class Command(BaseCommand, LoggerMixin): |
8 | 16 | ||
9 | def __init__(self): | 17 | def __init__(self): |
10 | super().__init__() | 18 | super().__init__() |
19 | self.log_base = '[doc process]' | ||
11 | # 处理文件开关 | 20 | # 处理文件开关 |
12 | self.switch = True | 21 | self.switch = True |
22 | # 数据目录 | ||
23 | self.data_dir = conf.DATA_DIR | ||
24 | # pdf页面转图片 | ||
25 | self.zoom_x = 2.0 | ||
26 | self.zoom_y = 2.0 | ||
27 | self.trans = fitz.Matrix(self.zoom_x, self.zoom_y).preRotate(0) # zoom factor 2 in each dimension | ||
13 | # 优雅退出信号:15 | 28 | # 优雅退出信号:15 |
14 | signal.signal(signal.SIGTERM, self.signal_handler) | 29 | signal.signal(signal.SIGTERM, self.signal_handler) |
15 | 30 | ||
16 | def signal_handler(self, sig, frame): | 31 | def signal_handler(self, sig, frame): |
17 | self.switch = False # 停止处理文件 | 32 | self.switch = False # 停止处理文件 |
18 | 33 | ||
19 | def get_task_info(self): | 34 | def get_task_info(self): # TODO 优先队列 & status modify |
20 | pass | 35 | task_id = rh.dequeue() |
36 | if task_id is None: | ||
37 | self.cronjob_log.info('{0} [get_task_info] [queue empty]'.format(self.log_base)) | ||
38 | return | ||
39 | task_info = UploadDocRecords.objects.filter(id=task_id).values( | ||
40 | 'id', 'metadata_version_id', 'document_name').first() | ||
41 | if task_info is None: | ||
42 | self.cronjob_log.warn('{0} [get_task_info] [task not found] [task_id={1}]'.format(self.log_base, task_id)) | ||
43 | self.cronjob_log.info('{0} [get_task_info success] [task_info={1}]'.format(self.log_base, task_info)) | ||
44 | return task_info | ||
21 | 45 | ||
22 | def pdf_download(self, task_info): | 46 | def pdf_download(self, task_info): |
23 | pass | 47 | if task_info is None: |
48 | return | ||
49 | # TODO EDMS下载pdf | ||
50 | pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf' | ||
51 | self.cronjob_log.info('{0} [pdf download success] [task_info={1}] [pdf_path={2}]'.format( | ||
52 | self.log_base, task_info, pdf_path)) | ||
53 | return pdf_path | ||
54 | |||
55 | @staticmethod | ||
56 | def getimage(pix): | ||
57 | if pix.colorspace.n != 4: | ||
58 | return pix | ||
59 | tpix = fitz.Pixmap(fitz.csRGB, pix) | ||
60 | return tpix | ||
61 | |||
62 | def recoverpix(self, doc, item): | ||
63 | x = item[0] # xref of PDF image | ||
64 | s = item[1] # xref of its /SMask | ||
65 | is_rgb = True if item[5] == 'DeviceRGB' else False | ||
66 | |||
67 | # RGB | ||
68 | if is_rgb: | ||
69 | if s == 0: | ||
70 | return doc.extractImage(x) | ||
71 | # we need to reconstruct the alpha channel with the smask | ||
72 | pix1 = fitz.Pixmap(doc, x) | ||
73 | pix2 = fitz.Pixmap(doc, s) # create pixmap of the /SMask entry | ||
74 | |||
75 | # sanity check | ||
76 | if not (pix1.irect == pix2.irect and pix1.alpha == pix2.alpha == 0 and pix2.n == 1): | ||
77 | pix2 = None | ||
78 | return self.getimage(pix1) | ||
79 | |||
80 | pix = fitz.Pixmap(pix1) # copy of pix1, alpha channel added | ||
81 | pix.setAlpha(pix2.samples) # treat pix2.samples as alpha value | ||
82 | pix1 = pix2 = None # free temp pixmaps | ||
83 | return self.getimage(pix) | ||
84 | |||
85 | # GRAY/CMYK | ||
86 | pix1 = fitz.Pixmap(doc, x) | ||
87 | pix = fitz.Pixmap(pix1) # copy of pix1, alpha channel added | ||
88 | |||
89 | if s != 0: | ||
90 | pix2 = fitz.Pixmap(doc, s) # create pixmap of the /SMask entry | ||
91 | |||
92 | # sanity check | ||
93 | if not (pix1.irect == pix2.irect and pix1.alpha == pix2.alpha == 0 and pix2.n == 1): | ||
94 | pix2 = None | ||
95 | return self.getimage(pix1) | ||
96 | |||
97 | pix.setAlpha(pix2.samples) # treat pix2.samples as alpha value | ||
98 | |||
99 | pix1 = pix2 = None # free temp pixmaps | ||
100 | |||
101 | pix = fitz.Pixmap(fitz.csRGB, pix) # GRAY/CMYK to RGB | ||
102 | return self.getimage(pix) | ||
103 | |||
104 | @staticmethod | ||
105 | def get_img_data(pix): | ||
106 | if type(pix) is dict: # we got a raw image | ||
107 | ext = pix["ext"] | ||
108 | img_data = pix["image"] | ||
109 | else: # we got a pixmap | ||
110 | ext = 'png' | ||
111 | img_data = pix.getPNGData() | ||
112 | return ext, img_data | ||
113 | |||
114 | @staticmethod | ||
115 | def split_il(il): | ||
116 | img_il_list = [] | ||
117 | start = 0 | ||
118 | length = len(il) | ||
119 | for i in range(length): | ||
120 | if i == start: | ||
121 | if i == length - 1: | ||
122 | img_il_list.append(il[start: length]) | ||
123 | continue | ||
124 | elif i == length - 1: | ||
125 | img_il_list.append(il[start: length]) | ||
126 | continue | ||
127 | if il[i][2] != il[i - 1][2]: | ||
128 | img_il_list.append(il[start: i]) | ||
129 | start = i | ||
130 | elif il[i][3] != il[i - 1][3]: | ||
131 | img_il_list.append(il[start: i + 1]) | ||
132 | start = i + 1 | ||
133 | return img_il_list | ||
24 | 134 | ||
25 | def handle(self, *args, **kwargs): | 135 | def handle(self, *args, **kwargs): |
26 | while self.switch: | 136 | while self.switch: |
... | @@ -28,8 +138,65 @@ class Command(BaseCommand): | ... | @@ -28,8 +138,65 @@ class Command(BaseCommand): |
28 | task_info = self.get_task_info() | 138 | task_info = self.get_task_info() |
29 | # 从EDMS获取PDF文件 | 139 | # 从EDMS获取PDF文件 |
30 | pdf_path = self.pdf_download(task_info) | 140 | pdf_path = self.pdf_download(task_info) |
141 | # 队列为空时的处理 | ||
142 | if pdf_path is None: | ||
143 | time.sleep(10) | ||
144 | continue | ||
31 | # PDF文件提取图片 | 145 | # PDF文件提取图片 |
146 | img_save_path = os.path.join(os.path.dirname(pdf_path), 'img') | ||
147 | os.makedirs(img_save_path, exist_ok=True) | ||
148 | with fitz.Document(pdf_path) as pdf: | ||
149 | self.cronjob_log.info('{0} [pdf_path={1}] [pdf_metadata={2}]'.format( | ||
150 | self.log_base, pdf_path, pdf.metadata)) | ||
151 | # xref_list = [] # TODO 图片去重 | ||
152 | for pno in range(pdf.pageCount): | ||
153 | il = pdf.getPageImageList(pno) | ||
154 | il.sort(key=lambda x: x[0]) | ||
155 | img_il_list = self.split_il(il) | ||
156 | del il | ||
157 | |||
158 | if len(img_il_list) > 3: # 单页无规律小图过多时,使用页面转图片 | ||
159 | page = pdf.loadPage(pno) | ||
160 | pm = page.getPixmap(matrix=self.trans, alpha=False) | ||
161 | save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) | ||
162 | # pm.writePNG(save_path) | ||
163 | pm.writeImage(save_path) | ||
164 | else: # 提取图片 | ||
165 | for img_count, img_il in enumerate(img_il_list): | ||
166 | if len(img_il) == 1: # 当只有一张图片时, 简化处理 | ||
167 | pix = self.recoverpix(pdf, img_il[0]) | ||
168 | ext, img_data = self.get_img_data(pix) | ||
169 | save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format( | ||
170 | pno, img_count, ext)) | ||
171 | with open(save_path, "wb") as f: | ||
172 | f.write(img_data) | ||
173 | else: # 多张图片,竖向拼接 | ||
174 | height_sum = 0 | ||
175 | im_list = [] | ||
176 | width = img_il[0][2] | ||
177 | for img in img_il: | ||
178 | # xref = img[0] | ||
179 | # if xref in xref_list: | ||
180 | # continue | ||
181 | height = img[3] | ||
182 | pix = self.recoverpix(pdf, img) | ||
183 | ext, img_data = self.get_img_data(pix) | ||
184 | |||
185 | # xref_list.append(xref) | ||
186 | |||
187 | im = Image.open(BytesIO(img_data)) | ||
188 | im_list.append((height, im, ext)) | ||
189 | height_sum += height | ||
190 | |||
191 | save_path = os.path.join(img_save_path, 'page_{0}_img_{1}.{2}'.format( | ||
192 | pno, img_count, im_list[0][2])) | ||
193 | res = Image.new(im_list[0][1].mode, (width, height_sum)) | ||
194 | h_now = 0 | ||
195 | for h, m, _ in im_list: | ||
196 | res.paste(m, box=(0, h_now)) | ||
197 | h_now += h | ||
198 | res.save(save_path) | ||
199 | |||
32 | # 图片调用算法判断是否为银行流水 | 200 | # 图片调用算法判断是否为银行流水 |
33 | # 图片调用算法OCR为excel文件 | 201 | # 图片调用算法OCR为excel文件 |
34 | # 整合excel文件上传至EDMS | 202 | # 整合excel文件上传至EDMS |
35 | pass | ... | ... |
... | @@ -4,7 +4,7 @@ from django.db import models | ... | @@ -4,7 +4,7 @@ from django.db import models |
4 | 4 | ||
5 | 5 | ||
6 | # 上传文件记录表/任务表 | 6 | # 上传文件记录表/任务表 |
7 | class UploadDocRecords(models.Model): | 7 | class UploadDocRecords(models.Model): # TODO add status |
8 | id = models.AutoField(primary_key=True, verbose_name="id") | 8 | id = models.AutoField(primary_key=True, verbose_name="id") |
9 | metadata_version_id = models.CharField(max_length=64, verbose_name="元数据版本id") | 9 | metadata_version_id = models.CharField(max_length=64, verbose_name="元数据版本id") |
10 | application_id = models.CharField(max_length=64, verbose_name="申请id") | 10 | application_id = models.CharField(max_length=64, verbose_name="申请id") | ... | ... |
... | @@ -5,6 +5,7 @@ from webargs.djangoparser import use_args, parser | ... | @@ -5,6 +5,7 @@ from webargs.djangoparser import use_args, parser |
5 | from common.mixins import GenericView | 5 | from common.mixins import GenericView |
6 | from common import response | 6 | from common import response |
7 | from .models import UploadDocRecords | 7 | from .models import UploadDocRecords |
8 | from common.redis_cache import redis_handler as rh | ||
8 | 9 | ||
9 | # Create your views here. | 10 | # Create your views here. |
10 | 11 | ||
... | @@ -51,7 +52,7 @@ class DocView(GenericView): | ... | @@ -51,7 +52,7 @@ class DocView(GenericView): |
51 | applicant_data = args.get('applicantData') | 52 | applicant_data = args.get('applicantData') |
52 | document = args.get('document') | 53 | document = args.get('document') |
53 | try: | 54 | try: |
54 | UploadDocRecords.objects.create( | 55 | task = UploadDocRecords.objects.create( |
55 | metadata_version_id=document.get('metadataVersionId'), | 56 | metadata_version_id=document.get('metadataVersionId'), |
56 | application_id=application_data.get('applicationId'), | 57 | application_id=application_data.get('applicationId'), |
57 | main_applicant=applicant_data.get('mainApplicantName'), | 58 | main_applicant=applicant_data.get('mainApplicantName'), |
... | @@ -68,6 +69,8 @@ class DocView(GenericView): | ... | @@ -68,6 +69,8 @@ class DocView(GenericView): |
68 | self.running_log.info('[doc upload fail] [args={0}] [err={1}]'.format(args, e)) | 69 | self.running_log.info('[doc upload fail] [args={0}] [err={1}]'.format(args, e)) |
69 | self.invalid_params(msg='metadataVersionId repeat') | 70 | self.invalid_params(msg='metadataVersionId repeat') |
70 | else: | 71 | else: |
72 | # TODO 查询加入优先队列 or 普通队列 | ||
73 | rh.enqueue(task.id) | ||
71 | self.running_log.info('[doc upload success] [args={0}]'.format(args)) | 74 | self.running_log.info('[doc upload success] [args={0}]'.format(args)) |
72 | return response.ok() | 75 | return response.ok() |
73 | 76 | ... | ... |
... | @@ -106,7 +106,7 @@ class Redis: | ... | @@ -106,7 +106,7 @@ class Redis: |
106 | 106 | ||
107 | def zremrangebyrank(self, name, start, end): | 107 | def zremrangebyrank(self, name, start, end): |
108 | with self.client.pipeline() as pipe: | 108 | with self.client.pipeline() as pipe: |
109 | pipe.zrange(name, start, end) | 109 | pipe.zrange(name, start, end) # TODO 可能出现不一致性 |
110 | pipe.zremrangebyrank(name, start, end) | 110 | pipe.zremrangebyrank(name, start, end) |
111 | item = pipe.execute() | 111 | item = pipe.execute() |
112 | return item | 112 | return item | ... | ... |
... | @@ -32,71 +32,12 @@ class RedisHandler: | ... | @@ -32,71 +32,12 @@ class RedisHandler: |
32 | self.redis = redis | 32 | self.redis = redis |
33 | self.time_expires = datetime.timedelta(hours=24) | 33 | self.time_expires = datetime.timedelta(hours=24) |
34 | self.time_format = '%a %b %d %H:%M:%S %Y' | 34 | self.time_format = '%a %b %d %H:%M:%S %Y' |
35 | self.prefix = 'automl' | 35 | self.prefix = 'bwm_ocr' |
36 | self.training_time_key = '{0}:training_time'.format(self.prefix) | ||
37 | self.queue_key = '{0}:queue'.format(self.prefix) | 36 | self.queue_key = '{0}:queue'.format(self.prefix) |
38 | self.prefix_training = '{0}:training'.format(self.prefix) | ||
39 | self.prefix_models = '{0}:models'.format(self.prefix) | ||
40 | self.prefix_img_info = '{0}:img_info'.format(self.prefix) | ||
41 | 37 | ||
42 | def get_training_model_key(self, user_id, model_type): | 38 | def enqueue(self, task_id): |
43 | return '{0}:{1}:{2}'.format(self.prefix_training, user_id, model_type) | ||
44 | |||
45 | def get_models_list_key(self, user_id, model_type): | ||
46 | return '{0}:{1}:{2}'.format(self.prefix_models, user_id, model_type) | ||
47 | |||
48 | def set_training_model(self, user_id, model_type, model_id, status): | ||
49 | # True | ||
50 | key = self.get_training_model_key(user_id, model_type) | ||
51 | mapping = { | ||
52 | 'model_id': model_id, | ||
53 | 'model_status': status | ||
54 | } | ||
55 | return self.redis.hmset(key, mapping) | ||
56 | |||
57 | def get_training_model(self, user_id, model_type): | ||
58 | # {} | ||
59 | # {'id': '1', 'status': '1'} | ||
60 | key = self.get_training_model_key(user_id, model_type) | ||
61 | res = self.redis.hgetall(key) | ||
62 | dict_str_to_int(res) | ||
63 | return res | ||
64 | |||
65 | def set_models_list(self, user_id, model_type, models_list): | ||
66 | key = self.get_models_list_key(user_id, model_type) | ||
67 | value = json.dumps(models_list, cls=DateTimeJSONEncoder) | ||
68 | return self.redis.set(key, value, expires=self.time_expires) | ||
69 | |||
70 | def get_models_list(self, user_id, model_type): | ||
71 | # list or None | ||
72 | key = self.get_models_list_key(user_id, model_type) | ||
73 | res_str = self.redis.get(key) | ||
74 | res = None if res_str is None else json.loads(res_str) | ||
75 | return res | ||
76 | |||
77 | def del_models_list(self, user_id, model_type): | ||
78 | # None | ||
79 | key = self.get_models_list_key(user_id, model_type) | ||
80 | return self.redis.delete(key) | ||
81 | |||
82 | def set_training_finish_time(self, finish_time): | ||
83 | # True | ||
84 | finish_time_str = datetime.datetime.strftime(finish_time, self.time_format) | ||
85 | return self.redis.set(self.training_time_key, finish_time_str) | ||
86 | |||
87 | def get_training_finish_time(self): | ||
88 | # datetime.datetime or None | ||
89 | res = self.redis.get(self.training_time_key) | ||
90 | finish_time = None if res is None else datetime.datetime.strptime(res, self.time_format) | ||
91 | return finish_time | ||
92 | |||
93 | def del_training_finish_time(self): | ||
94 | # None | ||
95 | return self.redis.delete(self.training_time_key) | ||
96 | |||
97 | def enqueue(self, model_id): | ||
98 | # 1 | 39 | # 1 |
99 | mapping = {model_id: time.time()} | 40 | mapping = {task_id: time.time()} |
100 | return self.redis.zadd(self.queue_key, mapping) | 41 | return self.redis.zadd(self.queue_key, mapping) |
101 | 42 | ||
102 | def dequeue(self): | 43 | def dequeue(self): |
... | @@ -106,110 +47,3 @@ class RedisHandler: | ... | @@ -106,110 +47,3 @@ class RedisHandler: |
106 | pop_item = int(pop_item_list[0]) if pop_item_list else None | 47 | pop_item = int(pop_item_list[0]) if pop_item_list else None |
107 | return pop_item | 48 | return pop_item |
108 | 49 | ||
109 | def get_queue_end(self): | ||
110 | # model_id:int or None | ||
111 | res_list = self.redis.zrange(self.queue_key, -1, -1) | ||
112 | end_id = int(res_list[0]) if res_list else None | ||
113 | return end_id | ||
114 | |||
115 | def get_queue_rank(self, model_id): | ||
116 | # rank:int or None | ||
117 | rank = self.redis.zrank(self.queue_key, model_id) | ||
118 | if rank is None: | ||
119 | return 0 | ||
120 | return rank + 1 | ||
121 | |||
122 | def set_img_info(self, user_id, model_id, count_sum, count_marked): | ||
123 | # True | ||
124 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
125 | mapping = { | ||
126 | 'count_sum': count_sum, | ||
127 | 'count_marked': count_marked | ||
128 | } | ||
129 | return self.redis.hmset(key, mapping) | ||
130 | |||
131 | def get_img_info(self, user_id, model_id): | ||
132 | # {} | ||
133 | # {'count_sum': '70', 'count_marked': '0'} | ||
134 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
135 | res = self.redis.hgetall(key) | ||
136 | dict_str_to_int(res) | ||
137 | return res | ||
138 | |||
139 | def update_img_info(self, user_id, model_id, del_img=False): | ||
140 | # res_count:int | ||
141 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
142 | if del_img: | ||
143 | return self.redis.hincrby(key, 'count_sum', amount=-1) | ||
144 | else: | ||
145 | return self.redis.hincrby(key, 'count_marked') | ||
146 | |||
147 | def del_img_info(self, user_id, model_id): | ||
148 | # None | ||
149 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
150 | return self.redis.delete(key) | ||
151 | |||
152 | def pipe_trained(self, user_id, model_type, model_id, status, success=True): | ||
153 | # redis.set_training_model(user_id, model_type, model_id, model_status) | ||
154 | # redis.del_training_finish_time() | ||
155 | # redis.del_models_list(user_id, model_type) | ||
156 | |||
157 | # redis.set_training_model(user_id, model_type, model_id, model_status) | ||
158 | # redis.del_training_finish_time() | ||
159 | |||
160 | training_model_key = self.get_training_model_key(user_id, model_type) | ||
161 | models_list_key = self.get_models_list_key(user_id, model_type) | ||
162 | mapping = { | ||
163 | 'model_id': model_id, | ||
164 | 'model_status': status | ||
165 | } | ||
166 | |||
167 | with self.redis.client.pipeline() as pipe: | ||
168 | pipe.hmset(training_model_key, mapping) | ||
169 | pipe.delete(self.training_time_key) | ||
170 | if success is True: | ||
171 | pipe.delete(models_list_key) | ||
172 | item = pipe.execute() | ||
173 | return item | ||
174 | |||
175 | def pipe_training(self, user_id, model_type, model_id, status, finish_time): | ||
176 | # redis.dequeue() | ||
177 | # redis.set_training_model(user_id, model_type, model_id, model_status) | ||
178 | # redis.set_training_finish_time(proleptic_finish_time) | ||
179 | |||
180 | training_model_key = self.get_training_model_key(user_id, model_type) | ||
181 | mapping = { | ||
182 | 'model_id': model_id, | ||
183 | 'model_status': status | ||
184 | } | ||
185 | finish_time_str = datetime.datetime.strftime(finish_time, self.time_format) | ||
186 | |||
187 | with self.redis.client.pipeline() as pipe: | ||
188 | pipe.zremrangebyrank(self.queue_key, 0, 0) | ||
189 | pipe.hmset(training_model_key, mapping) | ||
190 | pipe.set(self.training_time_key, finish_time_str) | ||
191 | item = pipe.execute() | ||
192 | return item | ||
193 | |||
194 | def pipe_enqueue(self, model_id, user_id, model_type, status, section=True): | ||
195 | # redis.enqueue(model_id) | ||
196 | # redis.set_training_model(user_id, model_type, | ||
197 | # model_id, ModelStatus.DATA_PRETREATMENT_DONE.value) | ||
198 | # if model_type == ModelType.SECTION.value: | ||
199 | # redis.del_img_info(user_id, model_id) | ||
200 | |||
201 | queue_mapping = {model_id: time.time()} | ||
202 | training_model_key = self.get_training_model_key(user_id, model_type) | ||
203 | mapping = { | ||
204 | 'model_id': model_id, | ||
205 | 'model_status': status | ||
206 | } | ||
207 | img_info_key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
208 | |||
209 | with self.redis.client.pipeline() as pipe: | ||
210 | pipe.zadd(self.queue_key, queue_mapping) | ||
211 | pipe.hmset(training_model_key, mapping) | ||
212 | if section is True: | ||
213 | pipe.delete(img_info_key) | ||
214 | item = pipe.execute() | ||
215 | return item | ... | ... |
1 | import fitz | 1 | import fitz |
2 | import os | 2 | import os |
3 | from PIL import Image, ImageCms | 3 | from PIL import Image |
4 | from io import BytesIO | 4 | from io import BytesIO |
5 | 5 | ||
6 | 6 | ||
... | @@ -126,7 +126,8 @@ class PdfHandler: | ... | @@ -126,7 +126,8 @@ class PdfHandler: |
126 | fout.close() | 126 | fout.close() |
127 | xreflist.append(xref) | 127 | xreflist.append(xref) |
128 | 128 | ||
129 | def split_il(self, il): | 129 | @staticmethod |
130 | def split_il(il): | ||
130 | img_il_list = [] | 131 | img_il_list = [] |
131 | start = 0 | 132 | start = 0 |
132 | length = len(il) | 133 | length = len(il) | ... | ... |
... | @@ -4,6 +4,7 @@ import os | ... | @@ -4,6 +4,7 @@ import os |
4 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 4 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
5 | COMMON_CONF_DIR = os.path.dirname(os.path.abspath(__file__)) | 5 | COMMON_CONF_DIR = os.path.dirname(os.path.abspath(__file__)) |
6 | SECRET_CONF_DIR = os.path.join(os.path.dirname(BASE_DIR), 'conf') | 6 | SECRET_CONF_DIR = os.path.join(os.path.dirname(BASE_DIR), 'conf') |
7 | DATA_DIR = os.path.join(os.path.dirname(BASE_DIR), 'data') | ||
7 | SECRET_CONF_FILE = os.path.join(SECRET_CONF_DIR, 'secret.ini') | 8 | SECRET_CONF_FILE = os.path.join(SECRET_CONF_DIR, 'secret.ini') |
8 | LOGGING_CONFIG_FILE = os.path.join(COMMON_CONF_DIR, 'logging.conf') | 9 | LOGGING_CONFIG_FILE = os.path.join(COMMON_CONF_DIR, 'logging.conf') |
9 | 10 | ... | ... |
-
Please register or sign in to post a comment