modify dataset
Showing
2 changed files
with
11 additions
and
11 deletions
| ... | @@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
| 239 | [label_bbox[0], label_bbox[3]], | 239 | [label_bbox[0], label_bbox[3]], |
| 240 | ] | 240 | ] |
| 241 | iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild) | 241 | iou = bbox_iou(go_bbox_rebuild, label_bbox_rebuild) |
| 242 | if iou >= 0.4: | 242 | if iou >= 0.2: |
| 243 | label_idx_dict[go_idx] = label_idx | 243 | label_idx_dict[go_idx] = label_idx |
| 244 | 244 | ||
| 245 | X = list() | 245 | X = list() |
| ... | @@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
| 264 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] | 264 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] |
| 265 | feature_vec = [1.] | 265 | feature_vec = [1.] |
| 266 | feature_vec.extend(simple_word2vec(text)) | 266 | feature_vec.extend(simple_word2vec(text)) |
| 267 | feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) | 267 | feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1]) |
| 268 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) | 268 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) |
| 269 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) | 269 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) |
| 270 | X.append(feature_vec) | 270 | X.append(feature_vec) |
| ... | @@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
| 273 | 273 | ||
| 274 | elif i in label_idx_dict: | 274 | elif i in label_idx_dict: |
| 275 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] | 275 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] |
| 276 | feature_vec = [0.] | 276 | feature_vec = [-1.] |
| 277 | feature_vec.extend(simple_word2vec(text)) | 277 | feature_vec.extend(simple_word2vec(text)) |
| 278 | feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) | 278 | feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1]) |
| 279 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) | 279 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) |
| 280 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) | 280 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) |
| 281 | X.append(feature_vec) | 281 | X.append(feature_vec) |
| ... | @@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save | ... | @@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save |
| 285 | y_true.append(base_label_list) | 285 | y_true.append(base_label_list) |
| 286 | else: | 286 | else: |
| 287 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] | 287 | (x0, y0, x1, y1, x2, y2, x3, y3), text = go_res_list[i] |
| 288 | feature_vec = [0.] | 288 | feature_vec = [-1.] |
| 289 | feature_vec.extend(simple_word2vec(text)) | 289 | feature_vec.extend(simple_word2vec(text)) |
| 290 | feature_vec.extend([x0/w, y0/h, x1/w, y1/h, x2/w, y2/h, x3/w, y3/h]) | 290 | feature_vec.extend([(x0/w)*2-1, (y0/h)*2-1, (x1/w)*2-1, (y1/h)*2-1, (x2/w)*2-1, (y2/h)*2-1, (x3/w)*2-1, (y3/h)*2-1]) |
| 291 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) | 291 | # feature_vec.extend(jwq_word2vec(text, text_vec_max_lens)) |
| 292 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) | 292 | feature_vec.extend(jieba_and_tencent_word2vec(text, max_jieba_char)) |
| 293 | X.append(feature_vec) | 293 | X.append(feature_vec) | ... | ... |
| ... | @@ -27,12 +27,12 @@ def simple_word2vec(text): | ... | @@ -27,12 +27,12 @@ def simple_word2vec(text): |
| 27 | else: | 27 | else: |
| 28 | other_num += 1 | 28 | other_num += 1 |
| 29 | 29 | ||
| 30 | vec = [text_len/100, | 30 | vec = [(text_len/100)*2 - 1, |
| 31 | cn_num/text_len, | 31 | (cn_num/text_len)*2 - 1, |
| 32 | en_num/text_len, | 32 | (en_num/text_len)*2 - 1, |
| 33 | digit_num/text_len, | 33 | (digit_num/text_len)*2 - 1, |
| 34 | # space_num/text_len, | 34 | # space_num/text_len, |
| 35 | other_num/text_len, | 35 | (other_num/text_len)*2 - 1, |
| 36 | ] | 36 | ] |
| 37 | 37 | ||
| 38 | # print(text) | 38 | # print(text) | ... | ... |
-
Please register or sign in to post a comment