modify dataset
Showing
2 changed files
with
11 additions
and
11 deletions
... | @@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
239 | [label_bbox[0], label_bbox[3]], | 239 | [label_bbox[0], label_bbox[3]], |
240 | ] | 240 | ] |
241 | iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild) | 241 | iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild) |
242 | if iou >= 0.4: | 242 | if iou >= 0.2: |
243 | label_idx_dict[go_idx] = label_idx | 243 | label_idx_dict[go_idx] = label_idx |
244 | 244 | ||
245 | X = list() | 245 | X = list() |
... | @@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
264 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] | 264 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] |
265 | feature_vec = [1.] | 265 | feature_vec = [1.] |
266 | feature_vec.extend(simple_word2vec(text)) | 266 | feature_vec.extend(simple_word2vec(text)) |
267 | feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) | 267 | feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1]) |
268 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) | 268 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) |
269 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) | 269 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) |
270 | X.append(feature_vec) | 270 | X.append(feature_vec) |
... | @@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
273 | 273 | ||
274 | elif i in label_idx_dict: | 274 | elif i in label_idx_dict: |
275 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] | 275 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] |
276 | feature_vec = [0.] | 276 | feature_vec = [-1.] |
277 | feature_vec.extend(simple_word2vec(text)) | 277 | feature_vec.extend(simple_word2vec(text)) |
278 | feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) | 278 | feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1]) |
279 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) | 279 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) |
280 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) | 280 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) |
281 | X.append(feature_vec) | 281 | X.append(feature_vec) |
... | @@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
285 | y_true.append(base_label_list) | 285 | y_true.append(base_label_list) |
286 | else: | 286 | else: |
287 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] | 287 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] |
288 | feature_vec = [0.] | 288 | feature_vec = [-1.] |
289 | feature_vec.extend(simple_word2vec(text)) | 289 | feature_vec.extend(simple_word2vec(text)) |
290 | feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) | 290 | feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1]) |
291 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) | 291 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) |
292 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) | 292 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) |
293 | X.append(feature_vec) | 293 | X.append(feature_vec) | ... | ... |
... | @@ -27,12 +27,12 @@ def simple_word2vec(text): | ... | @@ -27,12 +27,12 @@ def simple_word2vec(text): |
27 | else: | 27 | else: |
28 | other_num += 1 | 28 | other_num += 1 |
29 | 29 | ||
30 | vec = [text_len/100, | 30 | vec = [(text_len/100)*2 - 1, |
31 | cn_num/text_len, | 31 | (cn_num/text_len)*2 - 1, |
32 | en_num/text_len, | 32 | (en_num/text_len)*2 - 1, |
33 | digit_num/text_len, | 33 | (digit_num/text_len)*2 - 1, |
34 | # space_num/text_len, | 34 | # space_num/text_len, |
35 | other_num/text_len, | 35 | (other_num/text_len)*2 - 1, |
36 | ] | 36 | ] |
37 | 37 | ||
38 | # print(text) | 38 | # print(text) | ... | ... |
-
Please register or sign in to post a comment