Ksjsjjdj commited on
Commit
b2192d5
·
verified ·
1 Parent(s): 950252a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -62,7 +62,7 @@ class Usage(BaseModel):
62
  prompt_tokens: int
63
  completion_tokens: int
64
  total_tokens: int
65
- prompt_tokens_details: Optional[PromptTokensDetails]
66
 
67
  class ChatCompletionChoice(BaseModel):
68
  index: int
@@ -77,7 +77,7 @@ class ChatCompletionChunk(BaseModel):
77
  created: int = Field(...)
78
  model: str
79
  choices: List[ChatCompletionChoice]
80
- usage: Optional[Usage]
81
 
82
  class ToolFunction(BaseModel):
83
  name: str
@@ -493,7 +493,6 @@ async def chatResponseStream(request: ChatCompletionRequest, model_state: any, c
493
  clean_msg = cleanMessages(current_messages, enableReasoning)
494
  prompt = f"{clean_msg}\n\nAssistant:{' <think' if enableReasoning else ''}"
495
 
496
- tool_buffer = ""
497
  tool_call_mode = False
498
 
499
  async with GPU_LOCK:
@@ -529,12 +528,9 @@ async def chatResponseStream(request: ChatCompletionRequest, model_state: any, c
529
  async with GPU_LOCK:
530
  try:
531
  tool_out, tool_tokens, tool_state = await runPrefill(request, "", [0], model_state)
532
- temp_tokens = []
533
-
534
  current_gen = ""
535
 
536
  for i in range(200):
537
- args = PIPELINE_ARGS(temperature=0.1, top_p=0.1)
538
  tool_token = MODEL_STORAGE[request.model].pipeline.sample_logits(tool_out, temperature=0.1, top_p=0.1)
539
  tool_out, tool_state = MODEL_STORAGE[request.model].model.forward([tool_token], tool_state)
540
 
 
62
  prompt_tokens: int
63
  completion_tokens: int
64
  total_tokens: int
65
+ prompt_tokens_details: Optional[PromptTokensDetails] = None
66
 
67
  class ChatCompletionChoice(BaseModel):
68
  index: int
 
77
  created: int = Field(...)
78
  model: str
79
  choices: List[ChatCompletionChoice]
80
+ usage: Optional[Usage] = None
81
 
82
  class ToolFunction(BaseModel):
83
  name: str
 
493
  clean_msg = cleanMessages(current_messages, enableReasoning)
494
  prompt = f"{clean_msg}\n\nAssistant:{' <think' if enableReasoning else ''}"
495
 
 
496
  tool_call_mode = False
497
 
498
  async with GPU_LOCK:
 
528
  async with GPU_LOCK:
529
  try:
530
  tool_out, tool_tokens, tool_state = await runPrefill(request, "", [0], model_state)
 
 
531
  current_gen = ""
532
 
533
  for i in range(200):
 
534
  tool_token = MODEL_STORAGE[request.model].pipeline.sample_logits(tool_out, temperature=0.1, top_p=0.1)
535
  tool_out, tool_state = MODEL_STORAGE[request.model].model.forward([tool_token], tool_state)
536