Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
test_on_pytorch
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
18e1e6ed
authored
2022-12-24 14:05:11 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
modify dataset
1 parent
331c7717
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
11 deletions
data/create_dataset2.py
data/word2vec.py
data/create_dataset2.py
View file @
18e1e6e
...
...
@@ -239,7 +239,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
[
label_bbox
[
0
],
label_bbox
[
3
]],
]
iou
=
bbox_iou
(
go_bbox_rebuild
,
label_bbox_rebuild
)
if
iou
>=
0.
4
:
if
iou
>=
0.
2
:
label_idx_dict
[
go_idx
]
=
label_idx
X
=
list
()
...
...
@@ -264,7 +264,7 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
(
x0
,
y0
,
x1
,
y1
,
x2
,
y2
,
x3
,
y3
),
text
=
go_res_list
[
i
]
feature_vec
=
[
1.
]
feature_vec
.
extend
(
simple_word2vec
(
text
))
feature_vec
.
extend
([
x0
/
w
,
y0
/
h
,
x1
/
w
,
y1
/
h
,
x2
/
w
,
y2
/
h
,
x3
/
w
,
y3
/
h
])
feature_vec
.
extend
([
(
x0
/
w
)
*
2
-
1
,
(
y0
/
h
)
*
2
-
1
,
(
x1
/
w
)
*
2
-
1
,
(
y1
/
h
)
*
2
-
1
,
(
x2
/
w
)
*
2
-
1
,
(
y2
/
h
)
*
2
-
1
,
(
x3
/
w
)
*
2
-
1
,
(
y3
/
h
)
*
2
-
1
])
# feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
feature_vec
.
extend
(
jieba_and_tencent_word2vec
(
text
,
max_jieba_char
))
X
.
append
(
feature_vec
)
...
...
@@ -273,9 +273,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
elif
i
in
label_idx_dict
:
(
x0
,
y0
,
x1
,
y1
,
x2
,
y2
,
x3
,
y3
),
text
=
go_res_list
[
i
]
feature_vec
=
[
0
.
]
feature_vec
=
[
-
1
.
]
feature_vec
.
extend
(
simple_word2vec
(
text
))
feature_vec
.
extend
([
x0
/
w
,
y0
/
h
,
x1
/
w
,
y1
/
h
,
x2
/
w
,
y2
/
h
,
x3
/
w
,
y3
/
h
])
feature_vec
.
extend
([
(
x0
/
w
)
*
2
-
1
,
(
y0
/
h
)
*
2
-
1
,
(
x1
/
w
)
*
2
-
1
,
(
y1
/
h
)
*
2
-
1
,
(
x2
/
w
)
*
2
-
1
,
(
y2
/
h
)
*
2
-
1
,
(
x3
/
w
)
*
2
-
1
,
(
y3
/
h
)
*
2
-
1
])
# feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
feature_vec
.
extend
(
jieba_and_tencent_word2vec
(
text
,
max_jieba_char
))
X
.
append
(
feature_vec
)
...
...
@@ -285,9 +285,9 @@ def build_dataset(img_dir, go_res_dir, label_dir, top_text_list, skip_list, save
y_true
.
append
(
base_label_list
)
else
:
(
x0
,
y0
,
x1
,
y1
,
x2
,
y2
,
x3
,
y3
),
text
=
go_res_list
[
i
]
feature_vec
=
[
0
.
]
feature_vec
=
[
-
1
.
]
feature_vec
.
extend
(
simple_word2vec
(
text
))
feature_vec
.
extend
([
x0
/
w
,
y0
/
h
,
x1
/
w
,
y1
/
h
,
x2
/
w
,
y2
/
h
,
x3
/
w
,
y3
/
h
])
feature_vec
.
extend
([
(
x0
/
w
)
*
2
-
1
,
(
y0
/
h
)
*
2
-
1
,
(
x1
/
w
)
*
2
-
1
,
(
y1
/
h
)
*
2
-
1
,
(
x2
/
w
)
*
2
-
1
,
(
y2
/
h
)
*
2
-
1
,
(
x3
/
w
)
*
2
-
1
,
(
y3
/
h
)
*
2
-
1
])
# feature_vec.extend(jwq_word2vec(text, text_vec_max_lens))
feature_vec
.
extend
(
jieba_and_tencent_word2vec
(
text
,
max_jieba_char
))
X
.
append
(
feature_vec
)
...
...
data/word2vec.py
View file @
18e1e6e
...
...
@@ -27,12 +27,12 @@ def simple_word2vec(text):
else
:
other_num
+=
1
vec
=
[
text_len
/
100
,
cn_num
/
text_len
,
en_num
/
text_len
,
digit_num
/
text_len
,
vec
=
[
(
text_len
/
100
)
*
2
-
1
,
(
cn_num
/
text_len
)
*
2
-
1
,
(
en_num
/
text_len
)
*
2
-
1
,
(
digit_num
/
text_len
)
*
2
-
1
,
# space_num/text_len,
other_num
/
text_len
,
(
other_num
/
text_len
)
*
2
-
1
,
]
# print(text)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment