Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
test_on_pytorch
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
d865f629
authored
2022-12-12 18:33:44 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add VIT
1 parent
c424aba7
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
474 additions
and
0 deletions
.gitignore
config/vit.yaml
model/vit.py
solver/vit_solver.py
.gitignore
View file @
d865f62
.DS_Store
logs/
__pycache__
*.log
dataset/
...
...
config/vit.yaml
0 → 100644
View file @
d865f62
seed
:
3407
dataset
:
name
:
'
CoordinatesData'
args
:
data_root
:
'
/Users/zhouweiqi/Downloads/gcfp/data/dataset'
train_anno_file
:
'
/Users/zhouweiqi/Downloads/gcfp/data/dataset/train.csv'
val_anno_file
:
'
/Users/zhouweiqi/Downloads/gcfp/data/dataset/valid.csv'
dataloader
:
batch_size
:
32
num_workers
:
4
pin_memory
:
true
shuffle
:
true
model
:
name
:
'
VisionTransformer'
args
:
img_size
:
224
patch_size
:
16
in_c
:
3
num_classes
:
5
embed_dim
:
8
depth
:
12
num_heads
:
12
mlp_ratio
:
4.0
qkv_bias
:
true
qk_scale
:
none
representation_size
:
none
distilled
:
false
drop_ratio
:
0.
attn_drop_ratio
:
0.
drop_path_ratio
:
0.
norm_layer
:
none
act_layer
:
none
solver
:
name
:
'
VITSolver'
args
:
epoch
:
100
optimizer
:
name
:
'
Adam'
args
:
lr
:
!!float
1e-4
weight_decay
:
!!float
5e-5
lr_scheduler
:
name
:
'
StepLR'
args
:
step_size
:
15
gamma
:
0.1
loss
:
name
:
'
SigmoidFocalLoss'
# name: 'CrossEntropyLoss'
args
:
reduction
:
"
mean"
logger
:
log_root
:
'
/Users/zhouweiqi/Downloads/test/logs'
suffix
:
'
vit'
\ No newline at end of file
model/vit.py
0 → 100644
View file @
d865f62
from
functools
import
partial
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
from
utils.registery
import
MODEL_REGISTRY
def
_init_vit_weights
(
m
):
"""
ViT weight initialization
:param m: module
"""
if
isinstance
(
m
,
nn
.
Linear
):
nn
.
init
.
trunc_normal_
(
m
.
weight
,
std
=.
01
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
"fan_out"
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
LayerNorm
):
nn
.
init
.
zeros_
(
m
.
bias
)
nn
.
init
.
ones_
(
m
.
weight
)
def
drop_path
(
x
,
drop_prob
:
float
=
0.
,
training
:
bool
=
False
):
"""
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
'survival rate' as the argument.
"""
if
drop_prob
==
0.
or
not
training
:
return
x
keep_prob
=
1
-
drop_prob
shape
=
(
x
.
shape
[
0
],)
+
(
1
,)
*
(
x
.
ndim
-
1
)
# work with diff dim tensors, not just 2D ConvNets
random_tensor
=
keep_prob
+
torch
.
rand
(
shape
,
dtype
=
x
.
dtype
,
device
=
x
.
device
)
random_tensor
.
floor_
()
# binarize
output
=
x
.
div
(
keep_prob
)
*
random_tensor
return
output
class
DropPath
(
nn
.
Module
):
"""
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""
def
__init__
(
self
,
drop_prob
=
None
):
super
(
DropPath
,
self
)
.
__init__
()
self
.
drop_prob
=
drop_prob
def
forward
(
self
,
x
):
return
drop_path
(
x
,
self
.
drop_prob
,
self
.
training
)
class
Attention
(
nn
.
Module
):
def
__init__
(
self
,
dim
,
# 输入token的dim
num_heads
=
8
,
qkv_bias
=
False
,
qk_scale
=
None
,
attn_drop_ratio
=
0.
,
proj_drop_ratio
=
0.
):
super
(
Attention
,
self
)
.
__init__
()
self
.
num_heads
=
num_heads
head_dim
=
dim
//
num_heads
self
.
scale
=
qk_scale
or
head_dim
**
-
0.5
self
.
qkv
=
nn
.
Linear
(
dim
,
dim
*
3
,
bias
=
qkv_bias
)
self
.
attn_drop
=
nn
.
Dropout
(
attn_drop_ratio
)
self
.
proj
=
nn
.
Linear
(
dim
,
dim
)
self
.
proj_drop
=
nn
.
Dropout
(
proj_drop_ratio
)
def
forward
(
self
,
x
):
# [batch_size, num_patches + 1, total_embed_dim]
B
,
N
,
C
=
x
.
shape
# qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]
# reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]
# permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]
qkv
=
self
.
qkv
(
x
)
.
reshape
(
B
,
N
,
3
,
self
.
num_heads
,
C
//
self
.
num_heads
)
.
permute
(
2
,
0
,
3
,
1
,
4
)
# [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
q
,
k
,
v
=
qkv
[
0
],
qkv
[
1
],
qkv
[
2
]
# make torchscript happy (cannot use tensor as tuple)
# transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]
# @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]
attn
=
(
q
@
k
.
transpose
(
-
2
,
-
1
))
*
self
.
scale
attn
=
attn
.
softmax
(
dim
=-
1
)
attn
=
self
.
attn_drop
(
attn
)
# @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
# transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]
# reshape: -> [batch_size, num_patches + 1, total_embed_dim]
x
=
(
attn
@
v
)
.
transpose
(
1
,
2
)
.
reshape
(
B
,
N
,
C
)
x
=
self
.
proj
(
x
)
x
=
self
.
proj_drop
(
x
)
return
x
class
Mlp
(
nn
.
Module
):
"""
MLP as used in Vision Transformer, MLP-Mixer and related networks
"""
def
__init__
(
self
,
in_features
,
hidden_features
=
None
,
out_features
=
None
,
act_layer
=
nn
.
GELU
,
drop
=
0.
):
super
()
.
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
)
self
.
act
=
act_layer
()
self
.
fc2
=
nn
.
Linear
(
hidden_features
,
out_features
)
self
.
drop
=
nn
.
Dropout
(
drop
)
def
forward
(
self
,
x
):
x
=
self
.
fc1
(
x
)
x
=
self
.
act
(
x
)
x
=
self
.
drop
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
class
Block
(
nn
.
Module
):
def
__init__
(
self
,
dim
,
num_heads
,
mlp_ratio
=
4.
,
qkv_bias
=
False
,
qk_scale
=
None
,
drop_ratio
=
0.
,
attn_drop_ratio
=
0.
,
drop_path_ratio
=
0.
,
act_layer
=
nn
.
GELU
,
norm_layer
=
nn
.
LayerNorm
):
super
(
Block
,
self
)
.
__init__
()
self
.
norm1
=
norm_layer
(
dim
)
self
.
attn
=
Attention
(
dim
,
num_heads
=
num_heads
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
attn_drop_ratio
=
attn_drop_ratio
,
proj_drop_ratio
=
drop_ratio
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self
.
drop_path
=
DropPath
(
drop_path_ratio
)
if
drop_path_ratio
>
0.
else
nn
.
Identity
()
self
.
norm2
=
norm_layer
(
dim
)
mlp_hidden_dim
=
int
(
dim
*
mlp_ratio
)
self
.
mlp
=
Mlp
(
in_features
=
dim
,
hidden_features
=
mlp_hidden_dim
,
act_layer
=
act_layer
,
drop
=
drop_ratio
)
def
forward
(
self
,
x
):
x
=
x
+
self
.
drop_path
(
self
.
attn
(
self
.
norm1
(
x
)))
x
=
x
+
self
.
drop_path
(
self
.
mlp
(
self
.
norm2
(
x
)))
return
x
class
PatchEmbed
(
nn
.
Module
):
"""
2D Image to Patch Embedding
"""
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
16
,
in_c
=
3
,
embed_dim
=
768
,
norm_layer
=
None
):
super
()
.
__init__
()
img_size
=
(
img_size
,
img_size
)
patch_size
=
(
patch_size
,
patch_size
)
self
.
img_size
=
img_size
self
.
patch_size
=
patch_size
self
.
grid_size
=
(
img_size
[
0
]
//
patch_size
[
0
],
img_size
[
1
]
//
patch_size
[
1
])
self
.
num_patches
=
self
.
grid_size
[
0
]
*
self
.
grid_size
[
1
]
self
.
proj
=
nn
.
Conv2d
(
in_c
,
embed_dim
,
kernel_size
=
patch_size
,
stride
=
patch_size
)
self
.
norm
=
norm_layer
(
embed_dim
)
if
norm_layer
else
nn
.
Identity
()
def
forward
(
self
,
x
):
B
,
C
,
H
,
W
=
x
.
shape
assert
H
==
self
.
img_size
[
0
]
and
W
==
self
.
img_size
[
1
],
\
f
"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
# flatten: [B, C, H, W] -> [B, C, HW]
# transpose: [B, C, HW] -> [B, HW, C]
x
=
self
.
proj
(
x
)
.
flatten
(
2
)
.
transpose
(
1
,
2
)
x
=
self
.
norm
(
x
)
return
x
@MODEL_REGISTRY.register
()
class
VisionTransformer
(
nn
.
Module
):
def
__init__
(
self
,
img_size
=
224
,
patch_size
=
16
,
in_c
=
3
,
num_classes
=
1000
,
embed_dim
=
768
,
depth
=
12
,
num_heads
=
12
,
mlp_ratio
=
4.0
,
qkv_bias
=
True
,
qk_scale
=
None
,
representation_size
=
None
,
distilled
=
False
,
drop_ratio
=
0.
,
attn_drop_ratio
=
0.
,
drop_path_ratio
=
0.
,
embed_layer
=
PatchEmbed
,
norm_layer
=
None
,
act_layer
=
None
):
"""
Args:
img_size (int, tuple): input image size
patch_size (int, tuple): patch size
in_c (int): number of input channels
num_classes (int): number of classes for classification head
embed_dim (int): embedding dimension
depth (int): depth of transformer
num_heads (int): number of attention heads
mlp_ratio (int): ratio of mlp hidden dim to embedding dim
qkv_bias (bool): enable bias for qkv if True
qk_scale (float): override default qk scale of head_dim ** -0.5 if set
representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
distilled (bool): model includes a distillation token and head as in DeiT models
drop_ratio (float): dropout rate
attn_drop_ratio (float): attention dropout rate
drop_path_ratio (float): stochastic depth rate
embed_layer (nn.Module): patch embedding layer
norm_layer: (nn.Module): normalization layer
"""
super
(
VisionTransformer
,
self
)
.
__init__
()
self
.
num_classes
=
num_classes
self
.
num_features
=
self
.
embed_dim
=
embed_dim
# num_features for consistency with other models
self
.
num_tokens
=
2
if
distilled
else
1
norm_layer
=
norm_layer
or
partial
(
nn
.
LayerNorm
,
eps
=
1e-6
)
act_layer
=
act_layer
or
nn
.
GELU
self
.
patch_embed
=
embed_layer
(
img_size
=
img_size
,
patch_size
=
patch_size
,
in_c
=
in_c
,
embed_dim
=
embed_dim
)
num_patches
=
self
.
patch_embed
.
num_patches
self
.
cls_token
=
nn
.
Parameter
(
torch
.
zeros
(
1
,
1
,
embed_dim
))
self
.
dist_token
=
nn
.
Parameter
(
torch
.
zeros
(
1
,
1
,
embed_dim
))
if
distilled
else
None
self
.
pos_embed
=
nn
.
Parameter
(
torch
.
zeros
(
1
,
num_patches
+
self
.
num_tokens
,
embed_dim
))
self
.
pos_drop
=
nn
.
Dropout
(
p
=
drop_ratio
)
dpr
=
[
x
.
item
()
for
x
in
torch
.
linspace
(
0
,
drop_path_ratio
,
depth
)]
# stochastic depth decay rule
self
.
blocks
=
nn
.
Sequential
(
*
[
Block
(
dim
=
embed_dim
,
num_heads
=
num_heads
,
mlp_ratio
=
mlp_ratio
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
drop_ratio
=
drop_ratio
,
attn_drop_ratio
=
attn_drop_ratio
,
drop_path_ratio
=
dpr
[
i
],
norm_layer
=
norm_layer
,
act_layer
=
act_layer
)
for
i
in
range
(
depth
)
])
self
.
norm
=
norm_layer
(
embed_dim
)
# Representation layer
if
representation_size
and
not
distilled
:
self
.
has_logits
=
True
self
.
num_features
=
representation_size
self
.
pre_logits
=
nn
.
Sequential
(
OrderedDict
([
(
"fc"
,
nn
.
Linear
(
embed_dim
,
representation_size
)),
(
"act"
,
nn
.
Tanh
())
]))
else
:
self
.
has_logits
=
False
self
.
pre_logits
=
nn
.
Identity
()
# Classifier head(s)
self
.
head
=
nn
.
Linear
(
self
.
num_features
,
num_classes
)
if
num_classes
>
0
else
nn
.
Identity
()
self
.
head_dist
=
None
if
distilled
:
self
.
head_dist
=
nn
.
Linear
(
self
.
embed_dim
,
self
.
num_classes
)
if
num_classes
>
0
else
nn
.
Identity
()
# Weight init
nn
.
init
.
trunc_normal_
(
self
.
pos_embed
,
std
=
0.02
)
if
self
.
dist_token
is
not
None
:
nn
.
init
.
trunc_normal_
(
self
.
dist_token
,
std
=
0.02
)
nn
.
init
.
trunc_normal_
(
self
.
cls_token
,
std
=
0.02
)
self
.
apply
(
_init_vit_weights
)
def
forward_features
(
self
,
x
):
# [B, C, H, W] -> [B, num_patches, embed_dim]
# x = self.patch_embed(x) # [B, 196, 768]
# [1, 1, 768] -> [B, 1, 768]
# [B, 28+1, 8]
cls_token
=
self
.
cls_token
.
expand
(
x
.
shape
[
0
],
-
1
,
-
1
)
if
self
.
dist_token
is
None
:
x
=
torch
.
cat
((
cls_token
,
x
),
dim
=
1
)
# [B, 197, 768]
else
:
x
=
torch
.
cat
((
cls_token
,
self
.
dist_token
.
expand
(
x
.
shape
[
0
],
-
1
,
-
1
),
x
),
dim
=
1
)
x
=
self
.
pos_drop
(
x
+
self
.
pos_embed
)
x
=
self
.
blocks
(
x
)
x
=
self
.
norm
(
x
)
if
self
.
dist_token
is
None
:
return
self
.
pre_logits
(
x
[:,
0
])
else
:
return
x
[:,
0
],
x
[:,
1
]
def
forward
(
self
,
x
):
x
=
self
.
forward_features
(
x
)
if
self
.
head_dist
is
not
None
:
x
,
x_dist
=
self
.
head
(
x
[
0
]),
self
.
head_dist
(
x
[
1
])
if
self
.
training
and
not
torch
.
jit
.
is_scripting
():
# during inference, return the average of both classifier predictions
return
x
,
x_dist
else
:
return
(
x
+
x_dist
)
/
2
else
:
x
=
self
.
head
(
x
)
return
x
solver/vit_solver.py
0 → 100644
View file @
d865f62
import
os
import
copy
import
torch
from
model
import
build_model
from
data
import
build_dataloader
from
optimizer
import
build_optimizer
,
build_lr_scheduler
from
loss
import
build_loss
from
utils
import
SOLVER_REGISTRY
,
get_logger_and_log_dir
@SOLVER_REGISTRY.register
()
class
VITSolver
(
object
):
def
__init__
(
self
,
cfg
):
self
.
device
=
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
self
.
cfg
=
copy
.
deepcopy
(
cfg
)
self
.
train_loader
,
self
.
val_loader
=
build_dataloader
(
cfg
)
self
.
train_loader_size
,
self
.
val_loader_size
=
len
(
self
.
train_loader
),
len
(
self
.
val_loader
)
self
.
train_dataset_size
,
self
.
val_dataset_size
=
len
(
self
.
train_loader
.
dataset
),
len
(
self
.
val_loader
.
dataset
)
# BatchNorm ?
self
.
model
=
build_model
(
cfg
)
.
to
(
self
.
device
)
self
.
loss_fn
=
build_loss
(
cfg
)
self
.
optimizer
=
build_optimizer
(
cfg
)(
self
.
model
.
parameters
(),
**
cfg
[
'solver'
][
'optimizer'
][
'args'
])
self
.
hyper_params
=
cfg
[
'solver'
][
'args'
]
try
:
self
.
epoch
=
self
.
hyper_params
[
'epoch'
]
except
Exception
:
raise
'should contain epoch in {solver.args}'
self
.
logger
,
self
.
log_dir
=
get_logger_and_log_dir
(
**
cfg
[
'solver'
][
'logger'
])
@staticmethod
def
evaluate
(
y_pred
,
y_true
,
thresholds
=
0.5
):
y_pred_idx
=
torch
.
argmax
(
y_pred
,
dim
=
1
)
+
1
y_pred_is_other
=
(
torch
.
amax
(
y_pred
,
dim
=
1
)
>
0.5
)
.
int
()
y_pred_rebuild
=
torch
.
multiply
(
y_pred_idx
,
y_pred_is_other
)
y_true_idx
=
torch
.
argmax
(
y_true
,
dim
=
1
)
+
1
y_true_is_other
=
torch
.
sum
(
y_true
,
dim
=
1
)
y_true_rebuild
=
torch
.
multiply
(
y_true_idx
,
y_true_is_other
)
return
torch
.
sum
((
y_pred_rebuild
==
y_true_rebuild
)
.
int
())
.
item
()
def
train_loop
(
self
):
self
.
model
.
train
()
train_loss
=
torch
.
zeros
(
1
)
.
to
(
self
.
device
)
correct
=
torch
.
zeros
(
1
)
.
to
(
self
.
device
)
for
batch
,
(
X
,
y
)
in
enumerate
(
self
.
train_loader
):
X
,
y
=
X
.
to
(
self
.
device
),
y
.
to
(
self
.
device
)
pred
=
self
.
model
(
X
)
correct
+=
self
.
evaluate
(
pred
,
y
)
# loss = self.loss_fn(pred, y, reduction="mean")
loss
=
self
.
loss_fn
(
pred
,
y
)
train_loss
+=
loss
.
item
()
if
batch
%
100
==
0
:
loss_value
,
current
=
loss
.
item
(),
batch
self
.
logger
.
info
(
f
'train iteration: {current}/{self.train_loader_size}, train loss: {loss_value :.4f}'
)
self
.
optimizer
.
zero_grad
()
loss
.
backward
()
self
.
optimizer
.
step
()
correct
/=
self
.
train_dataset_size
train_loss
/=
self
.
train_loader_size
self
.
logger
.
info
(
f
'train accuracy: {correct.item() :.4f}, train mean loss: {train_loss.item() :.4f}'
)
@torch.no_grad
()
def
val_loop
(
self
,
t
):
self
.
model
.
eval
()
val_loss
=
torch
.
zeros
(
1
)
.
to
(
self
.
device
)
correct
=
torch
.
zeros
(
1
)
.
to
(
self
.
device
)
for
X
,
y
in
self
.
val_loader
:
X
,
y
=
X
.
to
(
self
.
device
),
y
.
to
(
self
.
device
)
pred
=
self
.
model
(
X
)
correct
+=
self
.
evaluate
(
pred
,
y
)
loss
=
self
.
loss_fn
(
pred
,
y
)
val_loss
+=
loss
.
item
()
correct
/=
self
.
val_dataset_size
val_loss
/=
self
.
val_loader_size
self
.
logger
.
info
(
f
"val accuracy: {correct.item() :.4f}, val mean loss: {val_loss.item() :.4f}"
)
def
save_checkpoint
(
self
,
epoch_id
):
self
.
model
.
eval
()
torch
.
save
(
self
.
model
.
state_dict
(),
os
.
path
.
join
(
self
.
log_dir
,
f
'ckpt_epoch_{epoch_id}.pt'
))
def
run
(
self
):
self
.
logger
.
info
(
'==> Start Training'
)
print
(
self
.
model
)
# lr_scheduler = build_lr_scheduler(self.cfg)(self.optimizer, **self.cfg['solver']['lr_scheduler']['args'])
for
t
in
range
(
self
.
epoch
):
self
.
logger
.
info
(
f
'==> epoch {t + 1}'
)
self
.
train_loop
()
self
.
val_loop
(
t
+
1
)
self
.
save_checkpoint
(
t
+
1
)
# lr_scheduler.step()
self
.
logger
.
info
(
'==> End Training'
)
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment