Update main.py
Browse files
main.py
CHANGED
|
@@ -115,7 +115,7 @@ async def chat_completions(request: ChatCompletionRequest):
|
|
| 115 |
|
| 116 |
# The payload for the external API uses our system prompt and the combined user query + search results
|
| 117 |
payload = {
|
| 118 |
-
"model": "meta-llama/llama-3.1-8b-instruct", # The actual model used by the inference API
|
| 119 |
"messages": [
|
| 120 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 121 |
{"role": "user", "content": final_prompt},
|
|
|
|
| 115 |
|
| 116 |
# The payload for the external API uses our system prompt and the combined user query + search results
|
| 117 |
payload = {
|
| 118 |
+
"model": "meta-llama/llama-3.1-8b-instruct/fp-8", # The actual model used by the inference API
|
| 119 |
"messages": [
|
| 120 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 121 |
{"role": "user", "content": final_prompt},
|