| model_parameters: | |
| model_name: "/network/shubhra/gemma_3n_quantized/gemma-3n-E2B-it-FP8_DYNAMIC_fix" | |
| dtype: auto | |
| gpu_memory_utilization: 0.9 | |
| tensor_parallel_size: 1 | |
| max_model_length: 32768 | |
| generation_parameters: | |
| temperature: 0.6 | |
| top_k: 20 | |
| min_p: 0.0 | |
| top_p: 0.95 | |
| max_new_tokens: 32768 | |