vit.yaml
1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
seed: 3407
dataset:
name: 'CoordinatesData'
args:
data_root: '/Users/zhouweiqi/Downloads/gcfp/data/dataset'
train_anno_file: '/Users/zhouweiqi/Downloads/gcfp/data/dataset/train.csv'
val_anno_file: '/Users/zhouweiqi/Downloads/gcfp/data/dataset/valid.csv'
dataloader:
batch_size: 32
num_workers: 4
pin_memory: true
shuffle: true
model:
name: 'VisionTransformer'
args:
img_size: 224
patch_size: 16
in_c: 3
num_classes: 5
embed_dim: 8
depth: 12
num_heads: 12
mlp_ratio: 4.0
qkv_bias: true
qk_scale: none
representation_size: none
distilled: false
drop_ratio: 0.
attn_drop_ratio: 0.
drop_path_ratio: 0.
norm_layer: none
act_layer: none
solver:
name: 'VITSolver'
args:
epoch: 100
optimizer:
name: 'Adam'
args:
lr: !!float 1e-4
weight_decay: !!float 5e-5
lr_scheduler:
name: 'StepLR'
args:
step_size: 15
gamma: 0.1
loss:
name: 'SigmoidFocalLoss'
# name: 'CrossEntropyLoss'
args:
reduction: "mean"
logger:
log_root: '/Users/zhouweiqi/Downloads/test/logs'
suffix: 'vit'