a4a63da9 by 周伟奇

metaVersionId unique & redis tool

1 parent 11b44dec
...@@ -16,11 +16,19 @@ class Command(BaseCommand): ...@@ -16,11 +16,19 @@ class Command(BaseCommand):
16 def signal_handler(self, sig, frame): 16 def signal_handler(self, sig, frame):
17 self.switch = False # 停止处理文件 17 self.switch = False # 停止处理文件
18 18
19 def get_task_info(self):
20 pass
21
22 def pdf_download(self, task_info):
23 pass
24
19 def handle(self, *args, **kwargs): 25 def handle(self, *args, **kwargs):
20 while self.switch: 26 while self.switch:
21 # 从队列获取文件信息 27 # 从队列获取文件信息
28 task_info = self.get_task_info()
22 # 从EDMS获取PDF文件 29 # 从EDMS获取PDF文件
23 # PDF文件分页转化为图片 30 pdf_path = self.pdf_download(task_info)
31 # PDF文件提取图片
24 # 图片调用算法判断是否为银行流水 32 # 图片调用算法判断是否为银行流水
25 # 图片调用算法OCR为excel文件 33 # 图片调用算法OCR为excel文件
26 # 整合excel文件上传至EDMS 34 # 整合excel文件上传至EDMS
......
...@@ -3,18 +3,19 @@ from django.db import models ...@@ -3,18 +3,19 @@ from django.db import models
3 # Create your models here. 3 # Create your models here.
4 4
5 5
6 # 上传文件记录表/任务表
6 class UploadDocRecords(models.Model): 7 class UploadDocRecords(models.Model):
7 id = models.AutoField(primary_key=True, verbose_name="id") 8 id = models.AutoField(primary_key=True, verbose_name="id")
9 metadata_version_id = models.CharField(max_length=64, verbose_name="元数据版本id")
8 application_id = models.CharField(max_length=64, verbose_name="申请id") 10 application_id = models.CharField(max_length=64, verbose_name="申请id")
9 main_applicant = models.CharField(max_length=16, verbose_name="主申请人") 11 main_applicant = models.CharField(max_length=16, verbose_name="主申请人")
10 co_applicant = models.CharField(max_length=16, verbose_name="共同申请人") 12 co_applicant = models.CharField(max_length=16, verbose_name="共同申请人")
11 guarantor_1 = models.CharField(max_length=16, verbose_name="担保人1") 13 guarantor_1 = models.CharField(max_length=16, verbose_name="担保人1")
12 guarantor_2 = models.CharField(max_length=16, verbose_name="担保人2") 14 guarantor_2 = models.CharField(max_length=16, verbose_name="担保人2")
13 document_name = models.CharField(max_length=255, verbose_name="文件名") 15 document_name = models.CharField(max_length=255, verbose_name="文件名")
14 document_scheme = models.CharField(max_length=64, verbose_name="文件格式") # TODO 确认verbose_name 16 document_scheme = models.CharField(max_length=64, verbose_name="文件格式")
15 business_type = models.CharField(max_length=64, verbose_name="业务类型") 17 business_type = models.CharField(max_length=64, verbose_name="业务类型")
16 data_source = models.CharField(max_length=64, verbose_name="数据源") 18 data_source = models.CharField(max_length=64, verbose_name="数据源")
17 metadata_version_id = models.CharField(max_length=64, verbose_name="元数据版本id")
18 upload_finish_time = models.DateTimeField(verbose_name="上传完成时间") 19 upload_finish_time = models.DateTimeField(verbose_name="上传完成时间")
19 update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') 20 update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
20 create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') 21 create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
......
1 from django.shortcuts import render 1 from django.shortcuts import render
2 from django.db.utils import IntegrityError
2 from webargs import fields, validate 3 from webargs import fields, validate
3 from webargs.djangoparser import use_args, parser 4 from webargs.djangoparser import use_args, parser
4 from common.mixins import GenericView 5 from common.mixins import GenericView
...@@ -49,21 +50,26 @@ class DocView(GenericView): ...@@ -49,21 +50,26 @@ class DocView(GenericView):
49 application_data = args.get('applicationData') 50 application_data = args.get('applicationData')
50 applicant_data = args.get('applicantData') 51 applicant_data = args.get('applicantData')
51 document = args.get('document') 52 document = args.get('document')
52 UploadDocRecords.objects.create( 53 try:
53 application_id=application_data.get('applicationId'), 54 UploadDocRecords.objects.create(
54 main_applicant=applicant_data.get('mainApplicantName'), 55 metadata_version_id=document.get('metadataVersionId'),
55 co_applicant=applicant_data.get('coApplicantName'), 56 application_id=application_data.get('applicationId'),
56 guarantor_1=applicant_data.get('guarantor1Name'), 57 main_applicant=applicant_data.get('mainApplicantName'),
57 guarantor_2=applicant_data.get('guarantor2Name'), 58 co_applicant=applicant_data.get('coApplicantName'),
58 document_name=document.get('documentName'), 59 guarantor_1=applicant_data.get('guarantor1Name'),
59 document_scheme=document.get('documentScheme'), 60 guarantor_2=applicant_data.get('guarantor2Name'),
60 business_type=document.get('businessType'), 61 document_name=document.get('documentName'),
61 data_source=document.get('dataSource'), 62 document_scheme=document.get('documentScheme'),
62 metadata_version_id=document.get('metadataVersionId'), 63 business_type=document.get('businessType'),
63 upload_finish_time=document.get('uploadFinishTime'), 64 data_source=document.get('dataSource'),
64 ) 65 upload_finish_time=document.get('uploadFinishTime'),
65 self.running_log.info('[doc upload success] [args={0}]'.format(args)) 66 )
66 return response.ok() 67 except IntegrityError as e:
68 self.running_log.info('[doc upload fail] [args={0}] [err={1}]'.format(args, e))
69 self.invalid_params(msg='metadataVersionId repeat')
70 else:
71 self.running_log.info('[doc upload success] [args={0}]'.format(args))
72 return response.ok()
67 73
68 post.openapi_doc = ''' 74 post.openapi_doc = '''
69 tags: [doc] 75 tags: [doc]
......
...@@ -54,8 +54,8 @@ def exception_handler(exc, context): ...@@ -54,8 +54,8 @@ def exception_handler(exc, context):
54 return APIResponse(meta_status, msg=str(exc)) 54 return APIResponse(meta_status, msg=str(exc))
55 55
56 elif isinstance(exc, Exception) and hasattr(exc, 'API_META_STATUS'): 56 elif isinstance(exc, Exception) and hasattr(exc, 'API_META_STATUS'):
57 msg = exc.API_META_STATUS.verbose_name 57 # msg = exc.API_META_STATUS.verbose_name
58 return APIResponse(exc.API_META_STATUS.value, msg=msg) 58 return APIResponse(exc.API_META_STATUS.value, msg=str(exc))
59 59
60 error_logger.exception('[system error]') 60 error_logger.exception('[system error]')
61 return APIResponse(MetaStatus.INTERNAL_ERROR.value, 61 return APIResponse(MetaStatus.INTERNAL_ERROR.value,
......
1 from .base import Redis
2 from .handler import RedisHandler
3 from settings import conf
4
5 redis_url = conf.REDIS_URL
6
7 # redis = Redis(redis_url)
8 # redis_handler = RedisHandler(redis)
1 from typing import NamedTuple
2 from urllib.parse import parse_qsl, unquote, urlparse
3 from redis import StrictRedis, ConnectionPool
4
5 try:
6 from collections.abc import Mapping
7 except ImportError:
8 from collections import Mapping
9
10
11 url_parts = NamedTuple('url_parts', [
12 ('scheme', str),
13 ('hostname', str),
14 ('port', int),
15 ('username', str),
16 ('password', str),
17 ('path', str),
18 ('query', Mapping),
19 ])
20
21
22 def url_to_parts(url):
23 # type: (str) -> urlparts
24 """Parse URL into :class:`urlparts` tuple of components."""
25 scheme = urlparse(url).scheme
26 schemeless = url[len(scheme) + 3:]
27 # parse with HTTP URL semantics
28 parts = urlparse('http://' + schemeless)
29 path = parts.path or ''
30 path = path[1:] if path and path[0] == '/' else path
31 return url_parts(
32 scheme,
33 unquote(parts.hostname or '') or None,
34 parts.port,
35 unquote(parts.username or '') or None,
36 unquote(parts.password or '') or None,
37 unquote(path or '') or None,
38 dict(parse_qsl(parts.query)),
39 )
40
41
42 class Redis:
43
44 def __init__(self, url, connection_pool=None, max_connections=None, socket_timeout=120,
45 retry_on_timeout=None, socket_connect_timeout=None):
46 self._ConnectionPool = connection_pool
47 scheme, host, port, _, password, path, query = url_to_parts(url)
48 self.conn_params = {
49 'host': host,
50 'port': port,
51 'db': int(path),
52 'password': password,
53 'max_connections': max_connections,
54 'socket_timeout': socket_timeout and float(socket_timeout),
55 'retry_on_timeout': retry_on_timeout or False,
56 'socket_connect_timeout':
57 socket_connect_timeout and float(socket_connect_timeout),
58 'decode_responses': True
59 }
60
61 self.client = StrictRedis(
62 connection_pool=self._get_pool(**self.conn_params),
63 )
64
65 @property
66 def ConnectionPool(self):
67 if self._ConnectionPool is None:
68 self._ConnectionPool = ConnectionPool
69 return self._ConnectionPool
70
71 def _get_pool(self, **params):
72 return self.ConnectionPool(**params)
73
74 def get(self, key):
75 return self.client.get(key)
76
77 def mget(self, keys):
78 return self.client.mget(keys)
79
80 def set(self, key, value, expires=None):
81 if expires:
82 return self.client.setex(key, expires, value)
83 else:
84 return self.client.set(key, value)
85
86 def delete(self, key):
87 self.client.delete(key)
88
89 def incr(self, key):
90 return self.client.incr(key)
91
92 def expire(self, key, value):
93 return self.client.expire(key, value)
94
95 def hmset(self, name, mapping):
96 return self.client.hmset(name, mapping)
97
98 def hgetall(self, name):
99 return self.client.hgetall(name)
100
101 def hincrby(self, name, key, amount=1):
102 return self.client.hincrby(name, key, amount)
103
104 def zadd(self, name, mapping):
105 return self.client.zadd(name, mapping)
106
107 def zremrangebyrank(self, name, start, end):
108 with self.client.pipeline() as pipe:
109 pipe.zrange(name, start, end)
110 pipe.zremrangebyrank(name, start, end)
111 item = pipe.execute()
112 return item
113
114 def zrank(self, name, value):
115 return self.client.zrank(name, value)
116
117 def zrange(self, name, start, end):
118 return self.client.zrange(name, start, end)
1 import json
2 import time
3 import datetime
4
5
6 class DateTimeJSONEncoder(json.JSONEncoder):
7 """
8 JSONEncoder subclass that knows how to encode date/time, decimal types, and
9 UUIDs.
10 """
11 def default(self, o):
12 # See "Date Time String Format" in the ECMA-262 specification.
13 if isinstance(o, datetime.datetime):
14 r = o.isoformat()
15 if o.microsecond:
16 r = r[:23] + r[26:]
17 if r.endswith('+00:00'):
18 r = r[:-6] + 'Z'
19 return r
20 else:
21 return super().default(o)
22
23
24 def dict_str_to_int(res):
25 for k, v in res.items():
26 res[k] = int(v)
27
28
29 class RedisHandler:
30
31 def __init__(self, redis):
32 self.redis = redis
33 self.time_expires = datetime.timedelta(hours=24)
34 self.time_format = '%a %b %d %H:%M:%S %Y'
35 self.prefix = 'automl'
36 self.training_time_key = '{0}:training_time'.format(self.prefix)
37 self.queue_key = '{0}:queue'.format(self.prefix)
38 self.prefix_training = '{0}:training'.format(self.prefix)
39 self.prefix_models = '{0}:models'.format(self.prefix)
40 self.prefix_img_info = '{0}:img_info'.format(self.prefix)
41
42 def get_training_model_key(self, user_id, model_type):
43 return '{0}:{1}:{2}'.format(self.prefix_training, user_id, model_type)
44
45 def get_models_list_key(self, user_id, model_type):
46 return '{0}:{1}:{2}'.format(self.prefix_models, user_id, model_type)
47
48 def set_training_model(self, user_id, model_type, model_id, status):
49 # True
50 key = self.get_training_model_key(user_id, model_type)
51 mapping = {
52 'model_id': model_id,
53 'model_status': status
54 }
55 return self.redis.hmset(key, mapping)
56
57 def get_training_model(self, user_id, model_type):
58 # {}
59 # {'id': '1', 'status': '1'}
60 key = self.get_training_model_key(user_id, model_type)
61 res = self.redis.hgetall(key)
62 dict_str_to_int(res)
63 return res
64
65 def set_models_list(self, user_id, model_type, models_list):
66 key = self.get_models_list_key(user_id, model_type)
67 value = json.dumps(models_list, cls=DateTimeJSONEncoder)
68 return self.redis.set(key, value, expires=self.time_expires)
69
70 def get_models_list(self, user_id, model_type):
71 # list or None
72 key = self.get_models_list_key(user_id, model_type)
73 res_str = self.redis.get(key)
74 res = None if res_str is None else json.loads(res_str)
75 return res
76
77 def del_models_list(self, user_id, model_type):
78 # None
79 key = self.get_models_list_key(user_id, model_type)
80 return self.redis.delete(key)
81
82 def set_training_finish_time(self, finish_time):
83 # True
84 finish_time_str = datetime.datetime.strftime(finish_time, self.time_format)
85 return self.redis.set(self.training_time_key, finish_time_str)
86
87 def get_training_finish_time(self):
88 # datetime.datetime or None
89 res = self.redis.get(self.training_time_key)
90 finish_time = None if res is None else datetime.datetime.strptime(res, self.time_format)
91 return finish_time
92
93 def del_training_finish_time(self):
94 # None
95 return self.redis.delete(self.training_time_key)
96
97 def enqueue(self, model_id):
98 # 1
99 mapping = {model_id: time.time()}
100 return self.redis.zadd(self.queue_key, mapping)
101
102 def dequeue(self):
103 # model_id:int or None
104 res_list = self.redis.zremrangebyrank(self.queue_key, 0, 0)
105 pop_item_list = res_list[0]
106 pop_item = int(pop_item_list[0]) if pop_item_list else None
107 return pop_item
108
109 def get_queue_end(self):
110 # model_id:int or None
111 res_list = self.redis.zrange(self.queue_key, -1, -1)
112 end_id = int(res_list[0]) if res_list else None
113 return end_id
114
115 def get_queue_rank(self, model_id):
116 # rank:int or None
117 rank = self.redis.zrank(self.queue_key, model_id)
118 if rank is None:
119 return 0
120 return rank + 1
121
122 def set_img_info(self, user_id, model_id, count_sum, count_marked):
123 # True
124 key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id)
125 mapping = {
126 'count_sum': count_sum,
127 'count_marked': count_marked
128 }
129 return self.redis.hmset(key, mapping)
130
131 def get_img_info(self, user_id, model_id):
132 # {}
133 # {'count_sum': '70', 'count_marked': '0'}
134 key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id)
135 res = self.redis.hgetall(key)
136 dict_str_to_int(res)
137 return res
138
139 def update_img_info(self, user_id, model_id, del_img=False):
140 # res_count:int
141 key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id)
142 if del_img:
143 return self.redis.hincrby(key, 'count_sum', amount=-1)
144 else:
145 return self.redis.hincrby(key, 'count_marked')
146
147 def del_img_info(self, user_id, model_id):
148 # None
149 key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id)
150 return self.redis.delete(key)
151
152 def pipe_trained(self, user_id, model_type, model_id, status, success=True):
153 # redis.set_training_model(user_id, model_type, model_id, model_status)
154 # redis.del_training_finish_time()
155 # redis.del_models_list(user_id, model_type)
156
157 # redis.set_training_model(user_id, model_type, model_id, model_status)
158 # redis.del_training_finish_time()
159
160 training_model_key = self.get_training_model_key(user_id, model_type)
161 models_list_key = self.get_models_list_key(user_id, model_type)
162 mapping = {
163 'model_id': model_id,
164 'model_status': status
165 }
166
167 with self.redis.client.pipeline() as pipe:
168 pipe.hmset(training_model_key, mapping)
169 pipe.delete(self.training_time_key)
170 if success is True:
171 pipe.delete(models_list_key)
172 item = pipe.execute()
173 return item
174
175 def pipe_training(self, user_id, model_type, model_id, status, finish_time):
176 # redis.dequeue()
177 # redis.set_training_model(user_id, model_type, model_id, model_status)
178 # redis.set_training_finish_time(proleptic_finish_time)
179
180 training_model_key = self.get_training_model_key(user_id, model_type)
181 mapping = {
182 'model_id': model_id,
183 'model_status': status
184 }
185 finish_time_str = datetime.datetime.strftime(finish_time, self.time_format)
186
187 with self.redis.client.pipeline() as pipe:
188 pipe.zremrangebyrank(self.queue_key, 0, 0)
189 pipe.hmset(training_model_key, mapping)
190 pipe.set(self.training_time_key, finish_time_str)
191 item = pipe.execute()
192 return item
193
194 def pipe_enqueue(self, model_id, user_id, model_type, status, section=True):
195 # redis.enqueue(model_id)
196 # redis.set_training_model(user_id, model_type,
197 # model_id, ModelStatus.DATA_PRETREATMENT_DONE.value)
198 # if model_type == ModelType.SECTION.value:
199 # redis.del_img_info(user_id, model_id)
200
201 queue_mapping = {model_id: time.time()}
202 training_model_key = self.get_training_model_key(user_id, model_type)
203 mapping = {
204 'model_id': model_id,
205 'model_status': status
206 }
207 img_info_key = '{0}:{1}:{2}'.format(self.prefix_img_info, user_id, model_id)
208
209 with self.redis.client.pipeline() as pipe:
210 pipe.zadd(self.queue_key, queue_mapping)
211 pipe.hmset(training_model_key, mapping)
212 if section is True:
213 pipe.delete(img_info_key)
214 item = pipe.execute()
215 return item
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!