18e1e6ed by 周伟奇

modify dataset

1 parent 331c7717
......@@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
[label_bbox[0], label_bbox[3]],
]
iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild)
if iou >= 0.4:
if iou >= 0.2:
label_idx_dict[go_idx] = label_idx
X = list()
......@@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
(x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i]
feature_vec = [1.]
feature_vec.extend(simple_word2vec(text))
feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h])
feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1])
# feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char))
X.append(feature_vec)
......@@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
elif i in label_idx_dict:
(x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i]
feature_vec = [0.]
feature_vec = [-1.]
feature_vec.extend(simple_word2vec(text))
feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h])
feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1])
# feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char))
X.append(feature_vec)
......@@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
y_true.append(base_label_list)
else:
(x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i]
feature_vec = [0.]
feature_vec = [-1.]
feature_vec.extend(simple_word2vec(text))
feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h])
feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1])
# feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char))
X.append(feature_vec)
......
......@@ -27,12 +27,12 @@ def simple_word2vec(text):
else:
other_num += 1
vec = [text_len/100,
cn_num/text_len,
en_num/text_len,
digit_num/text_len,
vec = [(text_len/100)*2 - 1,
(cn_num/text_len)*2 - 1,
(en_num/text_len)*2 - 1,
(digit_num/text_len)*2 - 1,
# space_num/text_len,
other_num/text_len,
(other_num/text_len)*2 - 1,
]
# print(text)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!