cortexso
/

mistral

@@ -6,6 +6,8 @@ files:
   - llama_model_path: model.gguf
 # Results Preferences
 top_p: 0.95
 temperature: 0.7
 frequency_penalty: 0
@@ -14,7 +16,6 @@ max_tokens: 4096 # Infer from base config.json -> max_position_embeddings
 stream: true # true | false
 # Engine / Model Settings
-ngl: 33 # Infer from base config.json -> num_attention_heads
 ctx_len: 4096 # Infer from base config.json -> max_position_embeddings
 engine: cortex.llamacpp
 prompt_template: "{system_message} [INST] {prompt} [/INST]"

   - llama_model_path: model.gguf
 # Results Preferences
+stop:
+  - </s>
 top_p: 0.95
 temperature: 0.7
 frequency_penalty: 0
 stream: true # true | false
 # Engine / Model Settings
 ctx_len: 4096 # Infer from base config.json -> max_position_embeddings
 engine: cortex.llamacpp
 prompt_template: "{system_message} [INST] {prompt} [/INST]"