| { | |
| "vocab_size": 50257, | |
| "dim": 384, | |
| "n_layers": 6, | |
| "max_seq_len": 256, | |
| "channel_top_k": 96, | |
| "token_top_k": 32, | |
| "ffn_mult": 4, | |
| "mem_dim": 96, | |
| "mem_size": 10000, | |
| "mem_k": 8, | |
| "mem_threshold": 0.4, | |
| "dropout": 0.1, | |
| "batch_size": 16, | |
| "lr": 0.0003, | |
| "max_steps": 15000, | |
| "warmup": 500, | |
| "log_every": 100, | |
| "eval_every": 500, | |
| "user_marker": "Q:", | |
| "asst_marker": "A:", | |
| "end_marker": "<END>", | |
| "pad_id": 50256 | |
| } |