| { | |
| "trainer": { | |
| "dict_class": "VSAEMultiGaussian", | |
| "trainer_class": "VSAEMultiGaussianTrainer", | |
| "activation_dim": 2048, | |
| "dict_size": 8192, | |
| "lr": 0.001, | |
| "kl_coeff": 50, | |
| "warmup_steps": 500, | |
| "sparsity_warmup_steps": 500, | |
| "corr_rate": 0.0, | |
| "var_flag": 0, | |
| "steps": 10000, | |
| "decay_start": 8000, | |
| "use_april_update_mode": true, | |
| "seed": null, | |
| "device": "cuda", | |
| "layer": 0, | |
| "lm_name": "gelu-1l", | |
| "wandb_name": "VSAEMulti_gelu-1l_d8192_lr0.001_kl50_corr0.0_trainer_0", | |
| "submodule_name": null | |
| }, | |
| "buffer": { | |
| "d_submodule": 2048, | |
| "n_ctxs": 3000, | |
| "ctx_len": 128, | |
| "refresh_batch_size": 32, | |
| "out_batch_size": 1024, | |
| "device": "cuda" | |
| } | |
| } |