vit.yaml
1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
seed: 3407
dataset:
name: 'CoordinatesData'
args:
data_root: '/Users/zhouweiqi/Downloads/gcfp/data/dataset'
train_anno_file: '/Users/zhouweiqi/Downloads/gcfp/data/dataset/train.csv'
val_anno_file: '/Users/zhouweiqi/Downloads/gcfp/data/dataset/valid.csv'
dataloader:
batch_size: 32
num_workers: 4
pin_memory: true
shuffle: true
model:
name: 'VisionTransformer'
args:
img_size: 224
patch_size: 16
in_c: 3
num_classes: 5
embed_dim: 8
depth: 12
num_heads: 2
mlp_ratio: 4.0
qkv_bias: true
qk_scale: null
representation_size: null
distilled: false
drop_ratio: 0.
attn_drop_ratio: 0.
drop_path_ratio: 0.
norm_layer: null
act_layer: null
input_length: 29
solver:
name: 'VITSolver'
args:
epoch: 100
no_other: false
base_on: null
optimizer:
name: 'Adam'
args:
lr: !!float 1e-3
# weight_decay: !!float 5e-5
lr_scheduler:
name: 'CosineLR'
args:
epochs: 100
lrf: 0.1
loss:
name: 'SigmoidFocalLoss'
# name: 'CrossEntropyLoss'
args:
reduction: "mean"
logger:
log_root: '/Users/zhouweiqi/Downloads/test/logs'
suffix: 'vit'