yangtb24 commited on
Commit
7008444
·
verified ·
1 Parent(s): 3d5f23c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -34
app.py CHANGED
@@ -421,7 +421,7 @@ def handsome_chat_completions():
421
  return jsonify({"error": "Invalid request data"}), 400
422
 
423
  model_name = data['model']
424
-
425
  api_key = select_key(model_name)
426
 
427
  if not api_key:
@@ -434,7 +434,7 @@ def handsome_chat_completions():
434
  )
435
  }
436
  ), 429
437
-
438
  if model_name == "deepseek-reasoner":
439
  for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
440
  if param in data:
@@ -444,7 +444,7 @@ def handsome_chat_completions():
444
  "Authorization": f"Bearer {api_key}",
445
  "Content-Type": "application/json"
446
  }
447
-
448
  try:
449
  start_time = time.time()
450
  response = requests.post(
@@ -479,7 +479,6 @@ def handsome_chat_completions():
479
  prompt_tokens = 0
480
  completion_tokens = 0
481
  response_content = ""
482
- formatted_chunks = [] # Collect formatted chunks for final response
483
  for line in full_response_content.splitlines():
484
  if line.startswith("data:"):
485
  line = line[5:].strip()
@@ -496,24 +495,17 @@ def handsome_chat_completions():
496
  "usage"
497
  ]["completion_tokens"]
498
 
499
- if "choices" in response_json and len(response_json["choices"]) > 0:
500
- delta = response_json["choices"][0].get("delta", {})
501
- if "reasoning_content" in delta and delta["reasoning_content"]:
502
- reasoning_lines = delta["reasoning_content"].splitlines()
503
- formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
504
- response_content += formatted_reasoning
505
- formatted_chunks.append({
506
- "choices": [
507
- {"delta": {"content": formatted_reasoning}, "index": 0, "finish_reason": None}
508
- ]
509
- })
510
- if "content" in delta and delta["content"]:
511
- response_content += delta["content"]
512
- formatted_chunks.append({
513
- "choices": [
514
- {"delta": {"content": delta["content"]}, "index": 0, "finish_reason": None}
515
- ]
516
- })
517
 
518
  if (
519
  "usage" in response_json and
@@ -574,18 +566,10 @@ def handsome_chat_completions():
574
  request_timestamps.append(time.time())
575
  token_counts.append(prompt_tokens+completion_tokens)
576
 
577
- # Generate the final stream response
578
- def generate_formatted_stream():
579
- for chunk in formatted_chunks:
580
- yield f"data: {json.dumps(chunk)}\n\n".encode('utf-8')
581
- yield "data: [DONE]\n\n".encode('utf-8')
582
-
583
-
584
- return Response(
585
- stream_with_context(generate_formatted_stream()),
586
- content_type="text/event-stream"
587
- )
588
-
589
  else:
590
  response.raise_for_status()
591
  end_time = time.time()
 
421
  return jsonify({"error": "Invalid request data"}), 400
422
 
423
  model_name = data['model']
424
+
425
  api_key = select_key(model_name)
426
 
427
  if not api_key:
 
434
  )
435
  }
436
  ), 429
437
+
438
  if model_name == "deepseek-reasoner":
439
  for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
440
  if param in data:
 
444
  "Authorization": f"Bearer {api_key}",
445
  "Content-Type": "application/json"
446
  }
447
+
448
  try:
449
  start_time = time.time()
450
  response = requests.post(
 
479
  prompt_tokens = 0
480
  completion_tokens = 0
481
  response_content = ""
 
482
  for line in full_response_content.splitlines():
483
  if line.startswith("data:"):
484
  line = line[5:].strip()
 
495
  "usage"
496
  ]["completion_tokens"]
497
 
498
+ if (
499
+ "choices" in response_json and
500
+ len(response_json["choices"]) > 0 and
501
+ "delta" in response_json["choices"][0] and
502
+ "content" in response_json[
503
+ "choices"
504
+ ][0]["delta"]
505
+ ):
506
+ response_content += response_json[
507
+ "choices"
508
+ ][0]["delta"]["content"]
 
 
 
 
 
 
 
509
 
510
  if (
511
  "usage" in response_json and
 
566
  request_timestamps.append(time.time())
567
  token_counts.append(prompt_tokens+completion_tokens)
568
 
569
+ return Response(
570
+ stream_with_context(generate()),
571
+ content_type=response.headers['Content-Type']
572
+ )
 
 
 
 
 
 
 
 
573
  else:
574
  response.raise_for_status()
575
  end_time = time.time()