fd556337 by 周伟奇

add contract 8.5

1 parent 2dc31fab
...@@ -2970,6 +2970,8 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True ...@@ -2970,6 +2970,8 @@ def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True
2970 auto_obj = auto_class.objects.filter(application_id=application_id, on_off=True).first() 2970 auto_obj = auto_class.objects.filter(application_id=application_id, on_off=True).first()
2971 if auto_obj is not None: 2971 if auto_obj is not None:
2972 auto_result = se_compare_auto(application_id, application_entity, ocr_res_id, last_obj, ocr_res_dict, auto_obj) 2972 auto_result = se_compare_auto(application_id, application_entity, ocr_res_id, last_obj, ocr_res_dict, auto_obj)
2973 else:
2974 auto_result = None
2973 2975
2974 full_result = se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res_dict, is_cms, auto_result) 2976 full_result = se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res_dict, is_cms, auto_result)
2975 2977
......
...@@ -6,28 +6,35 @@ ...@@ -6,28 +6,35 @@
6 # @Description : 6 # @Description :
7 7
8 from .get_char import Finder 8 from .get_char import Finder
9 import numpy as np
9 10
10 11
11 def predict(pdf_info): 12 def predict(pdf_info):
12 ocr_results = {} 13 ocr_results = {}
13 for pno in pdf_info: 14 for pno in pdf_info:
14 ocr_results[pno] = {} 15 ocr_results[pno] = {}
16 ocr_result = []
15 for key, block in enumerate(pdf_info[pno]['blocks']): 17 for key, block in enumerate(pdf_info[pno]['blocks']):
16 if block['type'] != 0: 18 if block['type'] != 0:
17 continue 19 continue
18 for line in block['lines']: 20 for line in block['lines']:
19 for span in line['spans']: 21 for span in line['spans']:
20 bbox, text = span['bbox'], span['text'] 22 bbox, text = span['bbox'], span['text']
23 if len(text) == 0:
24 continue
21 # print(text) 25 # print(text)
22 xmin, ymin, xmax, ymax = bbox 26 xmin, ymin, xmax, ymax = bbox
23 polygon = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] 27 polygon = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
28 polygon = np.array(polygon, dtype=np.int32).tolist()
24 text = text.replace(":", ":").replace(" ", "") 29 text = text.replace(":", ":").replace(" ", "")
25 ocr_results[pno][key] = [polygon, text] 30 ocr_result.append([polygon, text])
26 31 ocr_result = sorted(ocr_result, key=lambda x: x[0][1], reverse=False) # 按 y0 从小到大排
32 keys = list(range(len(ocr_result)))
33 ocr_result = dict(zip(keys, ocr_result))
34 ocr_results[pno] = ocr_result
27 # 输入是整个 PDF 中的信息 35 # 输入是整个 PDF 中的信息
28 f = Finder(pdf_info, ocr_results=ocr_results) 36 f = Finder(pdf_info, ocr_results=ocr_results)
29 results = f.get_info() 37 results = f.get_info()
30
31 return results 38 return results
32 39
33 40
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!