ADD:批量接口
Showing
2 changed files
with
63 additions
and
1 deletions
| ... | @@ -10,4 +10,7 @@ urlpatterns = [ | ... | @@ -10,4 +10,7 @@ urlpatterns = [ |
| 10 | path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), | 10 | path(r'invoice/downloadExcel', views.InvoiceExcelView.as_view()), |
| 11 | path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), | 11 | path(r'invoice/queryInfo', views.InvoiceQueryInfoView.as_view()), |
| 12 | path(r'contract/v1', views.SEContractView.as_view()), | 12 | path(r'contract/v1', views.SEContractView.as_view()), |
| 13 | path(r'reocr', views.DocReOcrView.as_view()), | ||
| 14 | path(r'batch/reocr', views.BatchReOcrView.as_view()), | ||
| 15 | |||
| 13 | ] | 16 | ] | ... | ... |
| ... | @@ -591,7 +591,11 @@ invoice_download_args = { | ... | @@ -591,7 +591,11 @@ invoice_download_args = { |
| 591 | 591 | ||
| 592 | doc_reocr_args = { | 592 | doc_reocr_args = { |
| 593 | 'doc_id': fields.Int(required=True), | 593 | 'doc_id': fields.Int(required=True), |
| 594 | 'application_entity': fields.Int(required=True), | 594 | 'application_entity': fields.Str(required=True), |
| 595 | } | ||
| 596 | |||
| 597 | batch_reocr_args = { | ||
| 598 | 'application_entity': fields.Str(required=True), | ||
| 595 | } | 599 | } |
| 596 | 600 | ||
| 597 | 601 | ||
| ... | @@ -2123,4 +2127,59 @@ class DocReOcrView(GenericView, DocHandler): | ... | @@ -2123,4 +2127,59 @@ class DocReOcrView(GenericView, DocHandler): |
| 2123 | '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id, | 2127 | '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id, |
| 2124 | is_priority, enqueue_res, classify_1)) | 2128 | is_priority, enqueue_res, classify_1)) |
| 2125 | 2129 | ||
| 2130 | return response.ok() | ||
| 2131 | |||
| 2132 | class BatchReOcrView(GenericView, DocHandler): | ||
| 2133 | permission_classes = [IsAuthenticated] | ||
| 2134 | authentication_classes = [OAuth2AuthenticationWithUser] | ||
| 2135 | |||
| 2136 | # 现有文件批量重新识别接口 | ||
| 2137 | @use_args(batch_reocr_args, location='data') | ||
| 2138 | def post(self, request, args): | ||
| 2139 | start_time = time.time() | ||
| 2140 | |||
| 2141 | application_entity = args.get('application_entity') | ||
| 2142 | |||
| 2143 | today = timezone.now().date() | ||
| 2144 | start_of_day = timezone.make_aware(timezone.datetime.combine(today, timezone.datetime.min.time())) | ||
| 2145 | end_of_day = timezone.make_aware(timezone.datetime.combine(today, timezone.datetime.max.time())) | ||
| 2146 | |||
| 2147 | |||
| 2148 | doc_class, prefix = self.get_doc_class(application_entity) | ||
| 2149 | docs = doc_class.objects.filter(status=2, create_time__range=(start_of_day, end_of_day)) | ||
| 2150 | |||
| 2151 | # 遍历 | ||
| 2152 | for doc in docs.iterator(): | ||
| 2153 | |||
| 2154 | # 3. 选择队列进入 | ||
| 2155 | is_priority = PriorityApplication.objects.filter(application_id=doc.application_id, on_off=True).exists() | ||
| 2156 | is_zip = False | ||
| 2157 | |||
| 2158 | classify_1 = 0 | ||
| 2159 | # 电子合同 Econtract or OVP(FSM) | ||
| 2160 | if doc.data_source == consts.DATA_SOURCE_LIST[2] or doc.data_source == consts.DATA_SOURCE_LIST[3]: | ||
| 2161 | if doc.document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
| 2162 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 2163 | if keyword in doc.document_name: | ||
| 2164 | classify_1 = classify_1_tmp | ||
| 2165 | break | ||
| 2166 | # FSM合同:WEP/MSI/SC/SC2 | ||
| 2167 | elif doc.data_source == consts.DATA_SOURCE_LIST[0] and doc.document_scheme == consts.DOC_SCHEME_LIST[0]: | ||
| 2168 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 2169 | if keyword in doc.document_name: | ||
| 2170 | classify_1 = classify_1_tmp | ||
| 2171 | break | ||
| 2172 | |||
| 2173 | |||
| 2174 | if doc.document_name.endswith('.zip') or doc.document_name.endswith('.rar') or doc.document_name.endswith('.ZIP') \ | ||
| 2175 | or doc.document_name.endswith('.RAR'): | ||
| 2176 | is_zip = True | ||
| 2177 | |||
| 2178 | # task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是 | ||
| 2179 | task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1), 'Y']) | ||
| 2180 | enqueue_res = rh.enqueue([task], is_priority, is_zip) | ||
| 2181 | self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] ' | ||
| 2182 | '[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'.format(args, prefix, doc.id, | ||
| 2183 | is_priority, enqueue_res, classify_1)) | ||
| 2184 | |||
| 2126 | return response.ok() | 2185 | return response.ok() |
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or sign in to post a comment