vsae-multi / config.json
zachdata's picture
Upload 2 files
2294543 verified
{
"trainer": {
"dict_class": "VSAEMultiGaussian",
"trainer_class": "VSAEMultiGaussianTrainer",
"activation_dim": 2048,
"dict_size": 8192,
"lr": 0.001,
"kl_coeff": 50,
"warmup_steps": 500,
"sparsity_warmup_steps": 500,
"corr_rate": 0.0,
"var_flag": 0,
"steps": 10000,
"decay_start": 8000,
"use_april_update_mode": true,
"seed": null,
"device": "cuda",
"layer": 0,
"lm_name": "gelu-1l",
"wandb_name": "VSAEMulti_gelu-1l_d8192_lr0.001_kl50_corr0.0_trainer_0",
"submodule_name": null
},
"buffer": {
"d_submodule": 2048,
"n_ctxs": 3000,
"ctx_len": 128,
"refresh_batch_size": 32,
"out_batch_size": 1024,
"device": "cuda"
}
}