18e1e6ed by 周伟奇

modify dataset

1 parent 331c7717
...@@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save ...@@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
239 [label_bbox[0], label_bbox[3]], 239 [label_bbox[0], label_bbox[3]],
240 ] 240 ]
241 iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild) 241 iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild)
242 if iou >= 0.4: 242 if iou >= 0.2:
243 label_idx_dict[go_idx] = label_idx 243 label_idx_dict[go_idx] = label_idx
244 244
245 X = list() 245 X = list()
...@@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save ...@@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
264 (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] 264 (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i]
265 feature_vec = [1.] 265 feature_vec = [1.]
266 feature_vec.extend(simple_word2vec(text)) 266 feature_vec.extend(simple_word2vec(text))
267 feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) 267 feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1])
268 # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) 268 # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
269 feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) 269 feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char))
270 X.append(feature_vec) 270 X.append(feature_vec)
...@@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save ...@@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
273 273
274 elif i in label_idx_dict: 274 elif i in label_idx_dict:
275 (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] 275 (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i]
276 feature_vec = [0.] 276 feature_vec = [-1.]
277 feature_vec.extend(simple_word2vec(text)) 277 feature_vec.extend(simple_word2vec(text))
278 feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) 278 feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1])
279 # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) 279 # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
280 feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) 280 feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char))
281 X.append(feature_vec) 281 X.append(feature_vec)
...@@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save ...@@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
285 y_true.append(base_label_list) 285 y_true.append(base_label_list)
286 else: 286 else:
287 (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] 287 (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i]
288 feature_vec = [0.] 288 feature_vec = [-1.]
289 feature_vec.extend(simple_word2vec(text)) 289 feature_vec.extend(simple_word2vec(text))
290 feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) 290 feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1])
291 # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) 291 # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
292 feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) 292 feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char))
293 X.append(feature_vec) 293 X.append(feature_vec)
......
...@@ -27,12 +27,12 @@ def simple_word2vec(text): ...@@ -27,12 +27,12 @@ def simple_word2vec(text):
27 else: 27 else:
28 other_num += 1 28 other_num += 1
29 29
30 vec = [text_len/100, 30 vec = [(text_len/100)*2 - 1,
31 cn_num/text_len, 31 (cn_num/text_len)*2 - 1,
32 en_num/text_len, 32 (en_num/text_len)*2 - 1,
33 digit_num/text_len, 33 (digit_num/text_len)*2 - 1,
34 # space_num/text_len, 34 # space_num/text_len,
35 other_num/text_len, 35 (other_num/text_len)*2 - 1,
36 ] 36 ]
37 37
38 # print(text) 38 # print(text)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!