metaVersionId unique & redis tool
Showing
7 changed files
with
376 additions
and
20 deletions
... | @@ -16,11 +16,19 @@ class Command(BaseCommand): | ... | @@ -16,11 +16,19 @@ class Command(BaseCommand): |
16 | def signal_handler(self, sig, frame): | 16 | def signal_handler(self, sig, frame): |
17 | self.switch = False # 停止处理文件 | 17 | self.switch = False # 停止处理文件 |
18 | 18 | ||
19 | def get_task_info(self): | ||
20 | pass | ||
21 | |||
22 | def pdf_download(self, task_info): | ||
23 | pass | ||
24 | |||
19 | def handle(self, *args, **kwargs): | 25 | def handle(self, *args, **kwargs): |
20 | while self.switch: | 26 | while self.switch: |
21 | # 从队列获取文件信息 | 27 | # 从队列获取文件信息 |
28 | task_info = self.get_task_info() | ||
22 | # 从EDMS获取PDF文件 | 29 | # 从EDMS获取PDF文件 |
23 | # PDF文件分页转化为图片 | 30 | pdf_path = self.pdf_download(task_info) |
31 | # PDF文件提取图片 | ||
24 | # 图片调用算法判断是否为银行流水 | 32 | # 图片调用算法判断是否为银行流水 |
25 | # 图片调用算法OCR为excel文件 | 33 | # 图片调用算法OCR为excel文件 |
26 | # 整合excel文件上传至EDMS | 34 | # 整合excel文件上传至EDMS | ... | ... |
... | @@ -3,18 +3,19 @@ from django.db import models | ... | @@ -3,18 +3,19 @@ from django.db import models |
3 | # Create your models here. | 3 | # Create your models here. |
4 | 4 | ||
5 | 5 | ||
6 | # 上传文件记录表/任务表 | ||
6 | class UploadDocRecords(models.Model): | 7 | class UploadDocRecords(models.Model): |
7 | id = models.AutoField(primary_key=True, verbose_name="id") | 8 | id = models.AutoField(primary_key=True, verbose_name="id") |
9 | metadata_version_id = models.CharField(max_length=64, verbose_name="元数据版本id") | ||
8 | application_id = models.CharField(max_length=64, verbose_name="申请id") | 10 | application_id = models.CharField(max_length=64, verbose_name="申请id") |
9 | main_applicant = models.CharField(max_length=16, verbose_name="主申请人") | 11 | main_applicant = models.CharField(max_length=16, verbose_name="主申请人") |
10 | co_applicant = models.CharField(max_length=16, verbose_name="共同申请人") | 12 | co_applicant = models.CharField(max_length=16, verbose_name="共同申请人") |
11 | guarantor_1 = models.CharField(max_length=16, verbose_name="担保人1") | 13 | guarantor_1 = models.CharField(max_length=16, verbose_name="担保人1") |
12 | guarantor_2 = models.CharField(max_length=16, verbose_name="担保人2") | 14 | guarantor_2 = models.CharField(max_length=16, verbose_name="担保人2") |
13 | document_name = models.CharField(max_length=255, verbose_name="文件名") | 15 | document_name = models.CharField(max_length=255, verbose_name="文件名") |
14 | document_scheme = models.CharField(max_length=64, verbose_name="文件格式") # TODO 确认verbose_name | 16 | document_scheme = models.CharField(max_length=64, verbose_name="文件格式") |
15 | business_type = models.CharField(max_length=64, verbose_name="业务类型") | 17 | business_type = models.CharField(max_length=64, verbose_name="业务类型") |
16 | data_source = models.CharField(max_length=64, verbose_name="数据源") | 18 | data_source = models.CharField(max_length=64, verbose_name="数据源") |
17 | metadata_version_id = models.CharField(max_length=64, verbose_name="元数据版本id") | ||
18 | upload_finish_time = models.DateTimeField(verbose_name="上传完成时间") | 19 | upload_finish_time = models.DateTimeField(verbose_name="上传完成时间") |
19 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 20 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
20 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 21 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | ... | ... |
1 | from django.shortcuts import render | 1 | from django.shortcuts import render |
2 | from django.db.utils import IntegrityError | ||
2 | from webargs import fields, validate | 3 | from webargs import fields, validate |
3 | from webargs.djangoparser import use_args, parser | 4 | from webargs.djangoparser import use_args, parser |
4 | from common.mixins import GenericView | 5 | from common.mixins import GenericView |
... | @@ -49,21 +50,26 @@ class DocView(GenericView): | ... | @@ -49,21 +50,26 @@ class DocView(GenericView): |
49 | application_data = args.get('applicationData') | 50 | application_data = args.get('applicationData') |
50 | applicant_data = args.get('applicantData') | 51 | applicant_data = args.get('applicantData') |
51 | document = args.get('document') | 52 | document = args.get('document') |
52 | UploadDocRecords.objects.create( | 53 | try: |
53 | application_id=application_data.get('applicationId'), | 54 | UploadDocRecords.objects.create( |
54 | main_applicant=applicant_data.get('mainApplicantName'), | 55 | metadata_version_id=document.get('metadataVersionId'), |
55 | co_applicant=applicant_data.get('coApplicantName'), | 56 | application_id=application_data.get('applicationId'), |
56 | guarantor_1=applicant_data.get('guarantor1Name'), | 57 | main_applicant=applicant_data.get('mainApplicantName'), |
57 | guarantor_2=applicant_data.get('guarantor2Name'), | 58 | co_applicant=applicant_data.get('coApplicantName'), |
58 | document_name=document.get('documentName'), | 59 | guarantor_1=applicant_data.get('guarantor1Name'), |
59 | document_scheme=document.get('documentScheme'), | 60 | guarantor_2=applicant_data.get('guarantor2Name'), |
60 | business_type=document.get('businessType'), | 61 | document_name=document.get('documentName'), |
61 | data_source=document.get('dataSource'), | 62 | document_scheme=document.get('documentScheme'), |
62 | metadata_version_id=document.get('metadataVersionId'), | 63 | business_type=document.get('businessType'), |
63 | upload_finish_time=document.get('uploadFinishTime'), | 64 | data_source=document.get('dataSource'), |
64 | ) | 65 | upload_finish_time=document.get('uploadFinishTime'), |
65 | self.running_log.info('[doc upload success] [args={0}]'.format(args)) | 66 | ) |
66 | return response.ok() | 67 | except IntegrityError as e: |
68 | self.running_log.info('[doc upload fail] [args={0}] [err={1}]'.format(args, e)) | ||
69 | self.invalid_params(msg='metadataVersionId repeat') | ||
70 | else: | ||
71 | self.running_log.info('[doc upload success] [args={0}]'.format(args)) | ||
72 | return response.ok() | ||
67 | 73 | ||
68 | post.openapi_doc = ''' | 74 | post.openapi_doc = ''' |
69 | tags: [doc] | 75 | tags: [doc] | ... | ... |
... | @@ -54,8 +54,8 @@ def exception_handler(exc, context): | ... | @@ -54,8 +54,8 @@ def exception_handler(exc, context): |
54 | return APIResponse(meta_status, msg=str(exc)) | 54 | return APIResponse(meta_status, msg=str(exc)) |
55 | 55 | ||
56 | elif isinstance(exc, Exception) and hasattr(exc, 'API_META_STATUS'): | 56 | elif isinstance(exc, Exception) and hasattr(exc, 'API_META_STATUS'): |
57 | msg = exc.API_META_STATUS.verbose_name | 57 | # msg = exc.API_META_STATUS.verbose_name |
58 | return APIResponse(exc.API_META_STATUS.value, msg=msg) | 58 | return APIResponse(exc.API_META_STATUS.value, msg=str(exc)) |
59 | 59 | ||
60 | error_logger.exception('[system error]') | 60 | error_logger.exception('[system error]') |
61 | return APIResponse(MetaStatus.INTERNAL_ERROR.value, | 61 | return APIResponse(MetaStatus.INTERNAL_ERROR.value, | ... | ... |
src/common/redis_cache/__init__.py
0 → 100644
src/common/redis_cache/base.py
0 → 100644
1 | from typing import NamedTuple | ||
2 | from urllib.parse import parse_qsl, unquote, urlparse | ||
3 | from redis import StrictRedis, ConnectionPool | ||
4 | |||
5 | try: | ||
6 | from collections.abc import Mapping | ||
7 | except ImportError: | ||
8 | from collections import Mapping | ||
9 | |||
10 | |||
11 | url_parts = NamedTuple('url_parts', [ | ||
12 | ('scheme', str), | ||
13 | ('hostname', str), | ||
14 | ('port', int), | ||
15 | ('username', str), | ||
16 | ('password', str), | ||
17 | ('path', str), | ||
18 | ('query', Mapping), | ||
19 | ]) | ||
20 | |||
21 | |||
22 | def url_to_parts(url): | ||
23 | # type: (str) -> urlparts | ||
24 | """Parse URL into :class:`urlparts` tuple of components.""" | ||
25 | scheme = urlparse(url).scheme | ||
26 | schemeless = url[len(scheme) + 3:] | ||
27 | # parse with HTTP URL semantics | ||
28 | parts = urlparse('http://' + schemeless) | ||
29 | path = parts.path or '' | ||
30 | path = path[1:] if path and path[0] == '/' else path | ||
31 | return url_parts( | ||
32 | scheme, | ||
33 | unquote(parts.hostname or '') or None, | ||
34 | parts.port, | ||
35 | unquote(parts.username or '') or None, | ||
36 | unquote(parts.password or '') or None, | ||
37 | unquote(path or '') or None, | ||
38 | dict(parse_qsl(parts.query)), | ||
39 | ) | ||
40 | |||
41 | |||
42 | class Redis: | ||
43 | |||
44 | def __init__(self, url, connection_pool=None, max_connections=None, socket_timeout=120, | ||
45 | retry_on_timeout=None, socket_connect_timeout=None): | ||
46 | self._ConnectionPool = connection_pool | ||
47 | scheme, host, port, _, password, path, query = url_to_parts(url) | ||
48 | self.conn_params = { | ||
49 | 'host': host, | ||
50 | 'port': port, | ||
51 | 'db': int(path), | ||
52 | 'password': password, | ||
53 | 'max_connections': max_connections, | ||
54 | 'socket_timeout': socket_timeout and float(socket_timeout), | ||
55 | 'retry_on_timeout': retry_on_timeout or False, | ||
56 | 'socket_connect_timeout': | ||
57 | socket_connect_timeout and float(socket_connect_timeout), | ||
58 | 'decode_responses': True | ||
59 | } | ||
60 | |||
61 | self.client = StrictRedis( | ||
62 | connection_pool=self._get_pool(**self.conn_params), | ||
63 | ) | ||
64 | |||
65 | @property | ||
66 | def ConnectionPool(self): | ||
67 | if self._ConnectionPool is None: | ||
68 | self._ConnectionPool = ConnectionPool | ||
69 | return self._ConnectionPool | ||
70 | |||
71 | def _get_pool(self, **params): | ||
72 | return self.ConnectionPool(**params) | ||
73 | |||
74 | def get(self, key): | ||
75 | return self.client.get(key) | ||
76 | |||
77 | def mget(self, keys): | ||
78 | return self.client.mget(keys) | ||
79 | |||
80 | def set(self, key, value, expires=None): | ||
81 | if expires: | ||
82 | return self.client.setex(key, expires, value) | ||
83 | else: | ||
84 | return self.client.set(key, value) | ||
85 | |||
86 | def delete(self, key): | ||
87 | self.client.delete(key) | ||
88 | |||
89 | def incr(self, key): | ||
90 | return self.client.incr(key) | ||
91 | |||
92 | def expire(self, key, value): | ||
93 | return self.client.expire(key, value) | ||
94 | |||
95 | def hmset(self, name, mapping): | ||
96 | return self.client.hmset(name, mapping) | ||
97 | |||
98 | def hgetall(self, name): | ||
99 | return self.client.hgetall(name) | ||
100 | |||
101 | def hincrby(self, name, key, amount=1): | ||
102 | return self.client.hincrby(name, key, amount) | ||
103 | |||
104 | def zadd(self, name, mapping): | ||
105 | return self.client.zadd(name, mapping) | ||
106 | |||
107 | def zremrangebyrank(self, name, start, end): | ||
108 | with self.client.pipeline() as pipe: | ||
109 | pipe.zrange(name, start, end) | ||
110 | pipe.zremrangebyrank(name, start, end) | ||
111 | item = pipe.execute() | ||
112 | return item | ||
113 | |||
114 | def zrank(self, name, value): | ||
115 | return self.client.zrank(name, value) | ||
116 | |||
117 | def zrange(self, name, start, end): | ||
118 | return self.client.zrange(name, start, end) |
src/common/redis_cache/handler.py
0 → 100644
1 | import json | ||
2 | import time | ||
3 | import datetime | ||
4 | |||
5 | |||
6 | class DateTimeJSONEncoder(json.JSONEncoder): | ||
7 | """ | ||
8 | JSONEncoder subclass that knows how to encode date/time, decimal types, and | ||
9 | UUIDs. | ||
10 | """ | ||
11 | def default(self, o): | ||
12 | # See "Date Time String Format" in the ECMA-262 specification. | ||
13 | if isinstance(o, datetime.datetime): | ||
14 | r = o.isoformat() | ||
15 | if o.microsecond: | ||
16 | r = r[:23] + r[26:] | ||
17 | if r.endswith('+00:00'): | ||
18 | r = r[:-6] + 'Z' | ||
19 | return r | ||
20 | else: | ||
21 | return super().default(o) | ||
22 | |||
23 | |||
24 | def dict_str_to_int(res): | ||
25 | for k, v in res.items(): | ||
26 | res[k] = int(v) | ||
27 | |||
28 | |||
29 | class RedisHandler: | ||
30 | |||
31 | def __init__(self, redis): | ||
32 | self.redis = redis | ||
33 | self.time_expires = datetime.timedelta(hours=24) | ||
34 | self.time_format = '%a %b %d %H:%M:%S %Y' | ||
35 | self.prefix = 'automl' | ||
36 | self.training_time_key = '{0}:training_time'.format(self.prefix) | ||
37 | self.queue_key = '{0}:queue'.format(self.prefix) | ||
38 | self.prefix_training = '{0}:training'.format(self.prefix) | ||
39 | self.prefix_models = '{0}:models'.format(self.prefix) | ||
40 | self.prefix_img_info = '{0}:img_info'.format(self.prefix) | ||
41 | |||
42 | def get_training_model_key(self, user_id, model_type): | ||
43 | return '{0}:{1}:{2}'.format(self.prefix_training, user_id, model_type) | ||
44 | |||
45 | def get_models_list_key(self, user_id, model_type): | ||
46 | return '{0}:{1}:{2}'.format(self.prefix_models, user_id, model_type) | ||
47 | |||
48 | def set_training_model(self, user_id, model_type, model_id, status): | ||
49 | # True | ||
50 | key = self.get_training_model_key(user_id, model_type) | ||
51 | mapping = { | ||
52 | 'model_id': model_id, | ||
53 | 'model_status': status | ||
54 | } | ||
55 | return self.redis.hmset(key, mapping) | ||
56 | |||
57 | def get_training_model(self, user_id, model_type): | ||
58 | # {} | ||
59 | # {'id': '1', 'status': '1'} | ||
60 | key = self.get_training_model_key(user_id, model_type) | ||
61 | res = self.redis.hgetall(key) | ||
62 | dict_str_to_int(res) | ||
63 | return res | ||
64 | |||
65 | def set_models_list(self, user_id, model_type, models_list): | ||
66 | key = self.get_models_list_key(user_id, model_type) | ||
67 | value = json.dumps(models_list, cls=DateTimeJSONEncoder) | ||
68 | return self.redis.set(key, value, expires=self.time_expires) | ||
69 | |||
70 | def get_models_list(self, user_id, model_type): | ||
71 | # list or None | ||
72 | key = self.get_models_list_key(user_id, model_type) | ||
73 | res_str = self.redis.get(key) | ||
74 | res = None if res_str is None else json.loads(res_str) | ||
75 | return res | ||
76 | |||
77 | def del_models_list(self, user_id, model_type): | ||
78 | # None | ||
79 | key = self.get_models_list_key(user_id, model_type) | ||
80 | return self.redis.delete(key) | ||
81 | |||
82 | def set_training_finish_time(self, finish_time): | ||
83 | # True | ||
84 | finish_time_str = datetime.datetime.strftime(finish_time, self.time_format) | ||
85 | return self.redis.set(self.training_time_key, finish_time_str) | ||
86 | |||
87 | def get_training_finish_time(self): | ||
88 | # datetime.datetime or None | ||
89 | res = self.redis.get(self.training_time_key) | ||
90 | finish_time = None if res is None else datetime.datetime.strptime(res, self.time_format) | ||
91 | return finish_time | ||
92 | |||
93 | def del_training_finish_time(self): | ||
94 | # None | ||
95 | return self.redis.delete(self.training_time_key) | ||
96 | |||
97 | def enqueue(self, model_id): | ||
98 | # 1 | ||
99 | mapping = {model_id: time.time()} | ||
100 | return self.redis.zadd(self.queue_key, mapping) | ||
101 | |||
102 | def dequeue(self): | ||
103 | # model_id:int or None | ||
104 | res_list = self.redis.zremrangebyrank(self.queue_key, 0, 0) | ||
105 | pop_item_list = res_list[0] | ||
106 | pop_item = int(pop_item_list[0]) if pop_item_list else None | ||
107 | return pop_item | ||
108 | |||
109 | def get_queue_end(self): | ||
110 | # model_id:int or None | ||
111 | res_list = self.redis.zrange(self.queue_key, -1, -1) | ||
112 | end_id = int(res_list[0]) if res_list else None | ||
113 | return end_id | ||
114 | |||
115 | def get_queue_rank(self, model_id): | ||
116 | # rank:int or None | ||
117 | rank = self.redis.zrank(self.queue_key, model_id) | ||
118 | if rank is None: | ||
119 | return 0 | ||
120 | return rank + 1 | ||
121 | |||
122 | def set_img_info(self, user_id, model_id, count_sum, count_marked): | ||
123 | # True | ||
124 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
125 | mapping = { | ||
126 | 'count_sum': count_sum, | ||
127 | 'count_marked': count_marked | ||
128 | } | ||
129 | return self.redis.hmset(key, mapping) | ||
130 | |||
131 | def get_img_info(self, user_id, model_id): | ||
132 | # {} | ||
133 | # {'count_sum': '70', 'count_marked': '0'} | ||
134 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
135 | res = self.redis.hgetall(key) | ||
136 | dict_str_to_int(res) | ||
137 | return res | ||
138 | |||
139 | def update_img_info(self, user_id, model_id, del_img=False): | ||
140 | # res_count:int | ||
141 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
142 | if del_img: | ||
143 | return self.redis.hincrby(key, 'count_sum', amount=-1) | ||
144 | else: | ||
145 | return self.redis.hincrby(key, 'count_marked') | ||
146 | |||
147 | def del_img_info(self, user_id, model_id): | ||
148 | # None | ||
149 | key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
150 | return self.redis.delete(key) | ||
151 | |||
152 | def pipe_trained(self, user_id, model_type, model_id, status, success=True): | ||
153 | # redis.set_training_model(user_id, model_type, model_id, model_status) | ||
154 | # redis.del_training_finish_time() | ||
155 | # redis.del_models_list(user_id, model_type) | ||
156 | |||
157 | # redis.set_training_model(user_id, model_type, model_id, model_status) | ||
158 | # redis.del_training_finish_time() | ||
159 | |||
160 | training_model_key = self.get_training_model_key(user_id, model_type) | ||
161 | models_list_key = self.get_models_list_key(user_id, model_type) | ||
162 | mapping = { | ||
163 | 'model_id': model_id, | ||
164 | 'model_status': status | ||
165 | } | ||
166 | |||
167 | with self.redis.client.pipeline() as pipe: | ||
168 | pipe.hmset(training_model_key, mapping) | ||
169 | pipe.delete(self.training_time_key) | ||
170 | if success is True: | ||
171 | pipe.delete(models_list_key) | ||
172 | item = pipe.execute() | ||
173 | return item | ||
174 | |||
175 | def pipe_training(self, user_id, model_type, model_id, status, finish_time): | ||
176 | # redis.dequeue() | ||
177 | # redis.set_training_model(user_id, model_type, model_id, model_status) | ||
178 | # redis.set_training_finish_time(proleptic_finish_time) | ||
179 | |||
180 | training_model_key = self.get_training_model_key(user_id, model_type) | ||
181 | mapping = { | ||
182 | 'model_id': model_id, | ||
183 | 'model_status': status | ||
184 | } | ||
185 | finish_time_str = datetime.datetime.strftime(finish_time, self.time_format) | ||
186 | |||
187 | with self.redis.client.pipeline() as pipe: | ||
188 | pipe.zremrangebyrank(self.queue_key, 0, 0) | ||
189 | pipe.hmset(training_model_key, mapping) | ||
190 | pipe.set(self.training_time_key, finish_time_str) | ||
191 | item = pipe.execute() | ||
192 | return item | ||
193 | |||
194 | def pipe_enqueue(self, model_id, user_id, model_type, status, section=True): | ||
195 | # redis.enqueue(model_id) | ||
196 | # redis.set_training_model(user_id, model_type, | ||
197 | # model_id, ModelStatus.DATA_PRETREATMENT_DONE.value) | ||
198 | # if model_type == ModelType.SECTION.value: | ||
199 | # redis.del_img_info(user_id, model_id) | ||
200 | |||
201 | queue_mapping = {model_id: time.time()} | ||
202 | training_model_key = self.get_training_model_key(user_id, model_type) | ||
203 | mapping = { | ||
204 | 'model_id': model_id, | ||
205 | 'model_status': status | ||
206 | } | ||
207 | img_info_key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id) | ||
208 | |||
209 | with self.redis.client.pipeline() as pipe: | ||
210 | pipe.zadd(self.queue_key, queue_mapping) | ||
211 | pipe.hmset(training_model_key, mapping) | ||
212 | if section is True: | ||
213 | pipe.delete(img_info_key) | ||
214 | item = pipe.execute() | ||
215 | return item |
-
Please register or sign in to post a comment