-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathconfig.yaml
115 lines (115 loc) · 2.6 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
MODEL:
WEIGHTS: ''
compute_precision:
grad_scaler: true
teacher:
backbone:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
dino_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
ibot_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
student:
backbone:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
dino_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp32
buffer_dtype: fp32
ibot_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp32
buffer_dtype: fp32
dino:
loss_weight: 1.0
head_n_prototypes: 65536
head_bottleneck_dim: 256
head_nlayers: 3
head_hidden_dim: 2048
koleo_loss_weight: 0.1
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.5
separate_head: false
head_n_prototypes: 65536
head_bottleneck_dim: 256
head_nlayers: 3
head_hidden_dim: 2048
train:
batch_size_per_gpu: 72
dataset_path: LegacySurvey:split=train:root={ASTROCLIP_ROOT}/datasets/decals:extra=""
output_dir: .
saveckp_freq: 20
seed: 0
num_workers: 10
OFFICIAL_EPOCH_LENGTH: 1250
cache_dataset: true
centering: "centering" # or "sinkhorn_knopp"
student:
arch: vit_large
patch_size: 12
drop_path_rate: 0.3
layerscale: 1.0e-05
drop_path_uniform: true
pretrained_weights: ''
ffn_layer: "mlp"
block_chunks: 4 # Is 0 is the normal config
qkv_bias: true
proj_bias: true
ffn_bias: true
teacher:
momentum_teacher: 0.992
final_momentum_teacher: 1
warmup_teacher_temp: 0.04
teacher_temp: 0.07
warmup_teacher_temp_epochs: 30
optim:
epochs: 200
weight_decay: 0.001
weight_decay_end: 0.01
base_lr: 2.0e-4 # learning rate for a batch size of 1024
lr: 0. # will be set after applying scaling rule
warmup_epochs: 32
min_lr: 1.0e-06
clip_grad: 3.0
freeze_last_layer_epochs: 1
scaling_rule: sqrt_wrt_1024
patch_embed_lr_mult: 0.2
layerwise_decay: 0.9
adamw_beta1: 0.9
adamw_beta2: 0.999
crops:
global_crops_scale:
- 0.8
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.4
- 0.6
global_crops_size: 144 # was 224
local_crops_size: 60 # 96
evaluation:
eval_period_iterations: 12500