/Users/josephbloom/GithubRepositories/mats_sae_training/scripts/wandb/run-20240325_154839-voo84o5b
"
+ "Run data is saved locally in /Users/josephbloom/GithubRepositories/mats_sae_training/scripts/wandb/run-20240326_191703-ec6k6v87
"
],
"text/plain": [
"details/current_learning_rate_coeff0.0002 | ▁▂▃▄▅▅▆▇████████████████████████████████ |
details/current_learning_rate_coeff0.0003 | ▁▂▃▄▅▅▆▇████████████████████████████████ |
details/current_learning_rate_coeff0.0006 | ▁▂▃▄▅▅▆▇████████████████████████████████ |
details/n_training_tokens | ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ |
losses/ghost_grad_loss_coeff0.0002 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/ghost_grad_loss_coeff0.0003 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/ghost_grad_loss_coeff0.0006 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/l1_loss_coeff0.0002 | ██▇▆▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/l1_loss_coeff0.0003 | ██▇▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/l1_loss_coeff0.0006 | █▇▆▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/mse_loss_coeff0.0002 | █▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/mse_loss_coeff0.0003 | █▅▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/mse_loss_coeff0.0006 | █▆▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/overall_loss_coeff0.0002 | █▆▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/overall_loss_coeff0.0003 | █▇▅▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/overall_loss_coeff0.0006 | █▇▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/CE_loss_score_coeff0.0002 | ▁▄▅▅▆▆▇▇▇▇▇█████████████ |
metrics/CE_loss_score_coeff0.0003 | ▁▄▄▄▅▅▆▆▇▇▇▇▇▇▇█████████ |
metrics/CE_loss_score_coeff0.0006 | ▁▄▂▂▃▄▄▅▅▅▆▆▆▇▇▇▇▇██████ |
metrics/ce_loss_with_ablation_coeff0.0002 | ▂▃▂▅▃▃▄▇▆▇█▄▅▃▄▇▆▂▁▄▄▄█▂ |
metrics/ce_loss_with_ablation_coeff0.0003 | ▆▄▁▅█▅▅█▄▄▆▅▇█▇▆▄▇▅▅▅▇▆▅ |
metrics/ce_loss_with_ablation_coeff0.0006 | ▄▆▅▆▃▄▅▆▇▆▆█▄▅▅▅▆▆▆▅▅▅▁▄ |
metrics/ce_loss_with_sae_coeff0.0002 | █▅▄▄▃▃▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁ |
metrics/ce_loss_with_sae_coeff0.0003 | █▅▅▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁ |
metrics/ce_loss_with_sae_coeff0.0006 | █▆▇▇▆▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁ |
metrics/ce_loss_without_sae_coeff0.0002 | ▃▅▄▃▃▃▅▅▃▅█▂▄▁█▇▄▄▁▂▃▄▅▇ |
metrics/ce_loss_without_sae_coeff0.0003 | ▃▄▆▅▅▅▆▆▄▁▄▄▅▅█▆▇▃▅▅▆▇▅▆ |
metrics/ce_loss_without_sae_coeff0.0006 | ▃▃▄▃▁▂▃▃▄█▃▂▅▅▅▃▆▅▃▄▅▄▂▂ |
metrics/explained_variance_coeff0.0002 | ▁▄▆▇▇▇▇█████████████████████████████████ |
metrics/explained_variance_coeff0.0003 | ▁▄▆▇▇▇▇▇▇▇██████████████████████████████ |
metrics/explained_variance_coeff0.0006 | ▁▃▆▆▆▆▆▆▇▇▇▇▇▇▇█████████████████████████ |
metrics/explained_variance_std_coeff0.0002 | ▇██▆▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/explained_variance_std_coeff0.0003 | ▆██▆▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/explained_variance_std_coeff0.0006 | ▅▇█▆▄▄▅▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁ |
metrics/l0_coeff0.0002 | ███▇▆▅▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/l0_coeff0.0003 | ███▇▆▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/l0_coeff0.0006 | ██▇▇▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/l2_norm_coeff0.0002 | ▂▂▁▁▂▄▄▅▅▆▆▆▇▇▇▇▇▇█▇████ |
metrics/l2_norm_coeff0.0003 | ▄▃▁▁▃▄▄▅▆▆▆▆▇▇▇▇▇▇▇█████ |
metrics/l2_norm_coeff0.0006 | ▆▄▁▁▂▃▄▅▅▆▆▆▇▇▇▇▇▇██████ |
metrics/l2_ratio_coeff0.0002 | ▂▂▁▁▂▄▄▅▅▆▆▆▇▇▇▇▇▇██████ |
metrics/l2_ratio_coeff0.0003 | ▄▃▁▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇██████ |
metrics/l2_ratio_coeff0.0006 | ▆▄▁▁▂▃▄▅▅▆▆▆▇▇▇▇▇███████ |
metrics/mean_log10_feature_sparsity_coeff0.0002 | █▄▂▁ |
metrics/mean_log10_feature_sparsity_coeff0.0003 | █▄▂▁ |
metrics/mean_log10_feature_sparsity_coeff0.0006 | █▄▂▁ |
sparsity/below_1e-5_coeff0.0002 | ▁▁▁█ |
sparsity/below_1e-5_coeff0.0003 | ▁▁▂█ |
sparsity/below_1e-5_coeff0.0006 | ▁▁▁█ |
sparsity/below_1e-6_coeff0.0002 | ▁▁▁▁ |
sparsity/below_1e-6_coeff0.0003 | ▁▁▁▁ |
sparsity/below_1e-6_coeff0.0006 | ▁▁▁▁ |
sparsity/dead_features_coeff0.0002 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
sparsity/dead_features_coeff0.0003 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▁▁▁▁██▅▁▁▁▁▅ |
sparsity/dead_features_coeff0.0006 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▁▁▁▁▁▄▅█▆▆▇ |
sparsity/mean_passes_since_fired_coeff0.0002 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▄▄▅▅▄▇▆▅▇▇██ |
sparsity/mean_passes_since_fired_coeff0.0003 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▅▆▆▇▆▆▇██ |
sparsity/mean_passes_since_fired_coeff0.0006 | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▅▅▆▆▆▇██ |
details/current_learning_rate_coeff0.0002 | 0.0001 |
details/current_learning_rate_coeff0.0003 | 0.0001 |
details/current_learning_rate_coeff0.0006 | 0.0001 |
details/n_training_tokens | 9994240 |
losses/ghost_grad_loss_coeff0.0002 | 0.0 |
losses/ghost_grad_loss_coeff0.0003 | 0.0 |
losses/ghost_grad_loss_coeff0.0006 | 0.0 |
losses/l1_loss_coeff0.0002 | 3.09463 |
losses/l1_loss_coeff0.0003 | 2.58137 |
losses/l1_loss_coeff0.0006 | 1.83998 |
losses/mse_loss_coeff0.0002 | 0.00019 |
losses/mse_loss_coeff0.0003 | 0.00026 |
losses/mse_loss_coeff0.0006 | 0.00053 |
losses/overall_loss_coeff0.0002 | 0.00081 |
losses/overall_loss_coeff0.0003 | 0.00104 |
losses/overall_loss_coeff0.0006 | 0.00163 |
metrics/CE_loss_score_coeff0.0002 | 0.92625 |
metrics/CE_loss_score_coeff0.0003 | 0.88257 |
metrics/CE_loss_score_coeff0.0006 | 0.74837 |
metrics/ce_loss_with_ablation_coeff0.0002 | 7.74356 |
metrics/ce_loss_with_ablation_coeff0.0003 | 7.77858 |
metrics/ce_loss_with_ablation_coeff0.0006 | 7.76302 |
metrics/ce_loss_with_sae_coeff0.0002 | 3.10251 |
metrics/ce_loss_with_sae_coeff0.0003 | 3.29611 |
metrics/ce_loss_with_sae_coeff0.0006 | 3.90444 |
metrics/ce_loss_without_sae_coeff0.0002 | 2.73316 |
metrics/ce_loss_without_sae_coeff0.0003 | 2.70042 |
metrics/ce_loss_without_sae_coeff0.0006 | 2.60843 |
metrics/explained_variance_coeff0.0002 | 0.96407 |
metrics/explained_variance_coeff0.0003 | 0.9494 |
metrics/explained_variance_coeff0.0006 | 0.89856 |
metrics/explained_variance_std_coeff0.0002 | 0.03024 |
metrics/explained_variance_std_coeff0.0003 | 0.04194 |
metrics/explained_variance_std_coeff0.0006 | 0.08105 |
metrics/l0_coeff0.0002 | 119.72095 |
metrics/l0_coeff0.0003 | 77.5647 |
metrics/l0_coeff0.0006 | 33.18384 |
metrics/l2_norm_coeff0.0002 | 1.39449 |
metrics/l2_norm_coeff0.0003 | 1.36607 |
metrics/l2_norm_coeff0.0006 | 1.28269 |
metrics/l2_ratio_coeff0.0002 | 0.93444 |
metrics/l2_ratio_coeff0.0003 | 0.91607 |
metrics/l2_ratio_coeff0.0006 | 0.86204 |
metrics/mean_log10_feature_sparsity_coeff0.0002 | -1.81471 |
metrics/mean_log10_feature_sparsity_coeff0.0003 | -2.2457 |
metrics/mean_log10_feature_sparsity_coeff0.0006 | -3.13876 |
sparsity/below_1e-5_coeff0.0002 | 1 |
sparsity/below_1e-5_coeff0.0003 | 6 |
sparsity/below_1e-5_coeff0.0006 | 27 |
sparsity/below_1e-6_coeff0.0002 | 0 |
sparsity/below_1e-6_coeff0.0003 | 0 |
sparsity/below_1e-6_coeff0.0006 | 0 |
sparsity/dead_features_coeff0.0002 | 0 |
sparsity/dead_features_coeff0.0003 | 1 |
sparsity/dead_features_coeff0.0006 | 7 |
sparsity/mean_passes_since_fired_coeff0.0002 | 0.23755 |
sparsity/mean_passes_since_fired_coeff0.0003 | 1.10229 |
sparsity/mean_passes_since_fired_coeff0.0006 | 5.02368 |
details/current_learning_rate | ▁▃▅▆████████████████████████████████████ |
details/n_training_tokens | ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ |
losses/ghost_grad_loss | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/l1_loss | ██▇▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/mse_loss | █▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
losses/overall_loss | █▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/CE_loss_score | ▁▄▅▆▆▇▇▇▇▇▇▇▇███████████████████████████ |
metrics/ce_loss_with_ablation | ▂▃▂▅▃▆▅▃▄▆▇▆▅▇▅▄▇▅▁▆▄▅▆▄█▄▅▆▄▅▅▃▂▄▄▅▅█▆▆ |
metrics/ce_loss_with_sae | █▅▄▃▃▃▂▂▂▂▃▂▂▂▂▂▂▂▁▂▂▂▂▁▂▂▂▂▁▂▂▁▁▂▁▂▁▂▂▂ |
metrics/ce_loss_without_sae | ▄▄▁▃▄▆▅▃▆▅█▆▅▆▅▄▅▆▁▇▆▅▆▃█▆▆▆▄▇▆▃▃▆▃▆▄█▇▅ |
metrics/explained_variance | ▁▅▇▇▇███████████████████████████████████ |
metrics/explained_variance_std | ██▆▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/l0 | ██▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
metrics/l2_norm | ▁▄▆▆▇▇▇▆▆▆▇█▇▇▆▇▇▆▇▇▇▆▇████▇▇▇▇▇▇▇▇▇█▇▇▇ |
metrics/l2_ratio | ▁▃▁▂▄▃▂▄▆▅▅▅▅▆▅▆▇▆▆▇▇▆▆▆▇▆▆▇▆▇▆▇▇▇█▆▆▇▇▇ |
metrics/mean_log10_feature_sparsity | █▇▅▄▃▃▂▁▁ |
sparsity/below_1e-5 | ▁▁▁▁▁▁▁▁▁ |
sparsity/below_1e-6 | ▁▁▁▁▁▁▁▁▁ |
sparsity/dead_features | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ |
sparsity/mean_passes_since_fired | ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▂▂▄▇▄▃▅▆▄▇██ |
details/current_learning_rate | 0.0001 |
details/n_training_tokens | 19988480 |
losses/ghost_grad_loss | 0.0 |
losses/l1_loss | 1.41017 |
losses/mse_loss | 8e-05 |
losses/overall_loss | 0.00036 |
metrics/CE_loss_score | 0.98362 |
metrics/ce_loss_with_ablation | 5.49512 |
metrics/ce_loss_with_sae | 2.71813 |
metrics/ce_loss_without_sae | 2.67199 |
metrics/explained_variance | 0.98647 |
metrics/explained_variance_std | 0.00905 |
metrics/l0 | 166.02246 |
metrics/l2_norm | 1.39317 |
metrics/l2_ratio | 0.99823 |
metrics/mean_log10_feature_sparsity | -1.53525 |
sparsity/below_1e-5 | 0 |
sparsity/below_1e-6 | 0 |
sparsity/dead_features | 0 |
sparsity/mean_passes_since_fired | 0.02051 |
./wandb/run-20240325_154839-voo84o5b/logs
"
+ "Find logs at: ./wandb/run-20240326_191703-ec6k6v87/logs
"
],
"text/plain": [
"