Update app.py
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
|
|
| 53 |
if not data.get("is_available", False):
|
| 54 |
logging.warning(f"API Key: {api_key} is not available.")
|
| 55 |
return None
|
| 56 |
-
|
| 57 |
balance_infos = data.get("balance_infos", [])
|
| 58 |
total_balance_cny = 0.0
|
| 59 |
usd_balance = 0.0
|
|
@@ -270,12 +270,12 @@ def check_tokens():
|
|
| 270 |
)
|
| 271 |
|
| 272 |
return jsonify(results)
|
| 273 |
-
|
| 274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
| 275 |
def list_models():
|
| 276 |
if not check_authorization(request):
|
| 277 |
return jsonify({"error": "Unauthorized"}), 401
|
| 278 |
-
|
| 279 |
detailed_models = [
|
| 280 |
{
|
| 281 |
"id": "deepseek-chat",
|
|
@@ -431,139 +431,15 @@ def handsome_chat_completions():
|
|
| 431 |
def generate():
|
| 432 |
first_chunk_time = None
|
| 433 |
full_response_content = ""
|
| 434 |
-
reasoning_content_accumulated = ""
|
| 435 |
-
content_accumulated = ""
|
| 436 |
-
|
| 437 |
-
|
| 438 |
for chunk in response.iter_content(chunk_size=1024):
|
| 439 |
if chunk:
|
| 440 |
if first_chunk_time is None:
|
| 441 |
first_chunk_time = time.time()
|
| 442 |
-
|
| 443 |
full_response_content += chunk.decode("utf-8")
|
| 444 |
-
|
| 445 |
-
try:
|
| 446 |
-
for line in chunk.decode("utf-8").splitlines():
|
| 447 |
-
if line.startswith("data:"):
|
| 448 |
-
line = line[5:].strip()
|
| 449 |
-
if line == "[DONE]":
|
| 450 |
-
continue
|
| 451 |
-
try:
|
| 452 |
-
response_json = json.loads(line)
|
| 453 |
-
|
| 454 |
-
if (
|
| 455 |
-
"usage" in response_json and
|
| 456 |
-
"completion_tokens" in response_json["usage"]
|
| 457 |
-
):
|
| 458 |
-
completion_tokens = response_json[
|
| 459 |
-
"usage"
|
| 460 |
-
]["completion_tokens"]
|
| 461 |
-
|
| 462 |
-
# Special handling for deepseek-reasoner in streaming mode
|
| 463 |
-
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
| 464 |
-
delta = response_json["choices"][0].get("delta", {})
|
| 465 |
-
|
| 466 |
-
if "reasoning_content" in delta and delta["reasoning_content"]:
|
| 467 |
-
reasoning_content = delta["reasoning_content"]
|
| 468 |
-
|
| 469 |
-
formatted_reasoning_chunk = {
|
| 470 |
-
"id": response_json.get("id", ""),
|
| 471 |
-
"object": "chat.completion.chunk",
|
| 472 |
-
"created": response_json.get("created", int(time.time())),
|
| 473 |
-
"model": model_name,
|
| 474 |
-
"choices": [
|
| 475 |
-
{
|
| 476 |
-
"index": 0,
|
| 477 |
-
"delta": {
|
| 478 |
-
"content": f"```Thinking\n{reasoning_content}\n```",
|
| 479 |
-
},
|
| 480 |
-
"finish_reason": None
|
| 481 |
-
}
|
| 482 |
-
],
|
| 483 |
-
"usage": None,
|
| 484 |
-
}
|
| 485 |
-
yield f"data: {json.dumps(formatted_reasoning_chunk)}\n\n".encode('utf-8')
|
| 486 |
-
if "content" in delta and delta["content"]:
|
| 487 |
-
content = delta["content"]
|
| 488 |
-
formatted_content_chunk = {
|
| 489 |
-
"id": response_json.get("id", ""),
|
| 490 |
-
"object": "chat.completion.chunk",
|
| 491 |
-
"created": response_json.get("created", int(time.time())),
|
| 492 |
-
"model": model_name,
|
| 493 |
-
"choices": [
|
| 494 |
-
{
|
| 495 |
-
"index": 0,
|
| 496 |
-
"delta": {
|
| 497 |
-
"content": content,
|
| 498 |
-
},
|
| 499 |
-
"finish_reason": None
|
| 500 |
-
}
|
| 501 |
-
],
|
| 502 |
-
"usage": None,
|
| 503 |
-
}
|
| 504 |
-
yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
|
| 505 |
-
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
| 506 |
-
# Handle other models normally
|
| 507 |
-
delta = response_json["choices"][0].get("delta", {})
|
| 508 |
-
if "content" in delta and delta["content"]:
|
| 509 |
-
formatted_content_chunk = {
|
| 510 |
-
"id": response_json.get("id", ""),
|
| 511 |
-
"object": "chat.completion.chunk",
|
| 512 |
-
"created": response_json.get("created", int(time.time())),
|
| 513 |
-
"model": model_name,
|
| 514 |
-
"choices": [
|
| 515 |
-
{
|
| 516 |
-
"index": 0,
|
| 517 |
-
"delta": {
|
| 518 |
-
"content": delta["content"],
|
| 519 |
-
},
|
| 520 |
-
"finish_reason": None
|
| 521 |
-
}
|
| 522 |
-
],
|
| 523 |
-
"usage": None,
|
| 524 |
-
}
|
| 525 |
-
yield f"data: {json.dumps(formatted_content_chunk)}\n\n".encode('utf-8')
|
| 526 |
-
|
| 527 |
-
if (
|
| 528 |
-
"usage" in response_json and
|
| 529 |
-
"prompt_tokens" in response_json["usage"]
|
| 530 |
-
):
|
| 531 |
-
prompt_tokens = response_json[
|
| 532 |
-
"usage"
|
| 533 |
-
]["prompt_tokens"]
|
| 534 |
-
|
| 535 |
-
except (
|
| 536 |
-
KeyError,
|
| 537 |
-
ValueError,
|
| 538 |
-
IndexError
|
| 539 |
-
) as e:
|
| 540 |
-
logging.error(
|
| 541 |
-
f"解析流式响应单行 JSON 失败: {e}, "
|
| 542 |
-
f"行内容: {line}"
|
| 543 |
-
)
|
| 544 |
-
except Exception as e:
|
| 545 |
-
logging.error(f"处理流式响应失败:{e}")
|
| 546 |
-
|
| 547 |
-
# Send the [DONE] message after all chunks have been processed
|
| 548 |
-
done_chunk = {
|
| 549 |
-
"id": response_json.get("id", ""),
|
| 550 |
-
"object": "chat.completion.chunk",
|
| 551 |
-
"created": response_json.get("created", int(time.time())),
|
| 552 |
-
"model": model_name,
|
| 553 |
-
"choices": [
|
| 554 |
-
{
|
| 555 |
-
"index": 0,
|
| 556 |
-
"delta": {},
|
| 557 |
-
"finish_reason": "stop"
|
| 558 |
-
}
|
| 559 |
-
],
|
| 560 |
-
"usage": {
|
| 561 |
-
"completion_tokens": completion_tokens,
|
| 562 |
-
"prompt_tokens": prompt_tokens,
|
| 563 |
-
"total_tokens": prompt_tokens + completion_tokens
|
| 564 |
-
},
|
| 565 |
-
}
|
| 566 |
-
yield f"data: {json.dumps(done_chunk)}\n\n".encode('utf-8')
|
| 567 |
|
| 568 |
end_time = time.time()
|
| 569 |
first_token_time = (
|
|
@@ -572,6 +448,61 @@ def handsome_chat_completions():
|
|
| 572 |
)
|
| 573 |
total_time = end_time - start_time
|
| 574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
|
| 576 |
user_content = ""
|
| 577 |
messages = data.get("messages", [])
|
|
@@ -595,7 +526,10 @@ def handsome_chat_completions():
|
|
| 595 |
user_content_replaced = user_content.replace(
|
| 596 |
'\n', '\\n'
|
| 597 |
).replace('\r', '\\n')
|
| 598 |
-
|
|
|
|
|
|
|
|
|
|
| 599 |
logging.info(
|
| 600 |
f"使用的key: {api_key}, "
|
| 601 |
f"提示token: {prompt_tokens}, "
|
|
@@ -603,16 +537,21 @@ def handsome_chat_completions():
|
|
| 603 |
f"首字用时: {first_token_time:.4f}秒, "
|
| 604 |
f"总共用时: {total_time:.4f}秒, "
|
| 605 |
f"使用的模型: {model_name}, "
|
| 606 |
-
f"用户的内容: {user_content_replaced}"
|
|
|
|
| 607 |
)
|
| 608 |
|
| 609 |
with data_lock:
|
| 610 |
request_timestamps.append(time.time())
|
| 611 |
token_counts.append(prompt_tokens + completion_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
|
| 613 |
return Response(
|
| 614 |
stream_with_context(generate()),
|
| 615 |
-
content_type=
|
| 616 |
)
|
| 617 |
else:
|
| 618 |
# ... (Non-streaming part remains the same as in the previous response)
|
|
@@ -631,8 +570,7 @@ def handsome_chat_completions():
|
|
| 631 |
choice = response_json["choices"][0]
|
| 632 |
if "message" in choice:
|
| 633 |
if "reasoning_content" in choice["message"]:
|
| 634 |
-
|
| 635 |
-
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
| 636 |
response_content += formatted_reasoning + "\n"
|
| 637 |
if "content" in choice["message"]:
|
| 638 |
response_content += choice["message"]["content"]
|
|
|
|
| 53 |
if not data.get("is_available", False):
|
| 54 |
logging.warning(f"API Key: {api_key} is not available.")
|
| 55 |
return None
|
| 56 |
+
|
| 57 |
balance_infos = data.get("balance_infos", [])
|
| 58 |
total_balance_cny = 0.0
|
| 59 |
usd_balance = 0.0
|
|
|
|
| 270 |
)
|
| 271 |
|
| 272 |
return jsonify(results)
|
| 273 |
+
|
| 274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
| 275 |
def list_models():
|
| 276 |
if not check_authorization(request):
|
| 277 |
return jsonify({"error": "Unauthorized"}), 401
|
| 278 |
+
|
| 279 |
detailed_models = [
|
| 280 |
{
|
| 281 |
"id": "deepseek-chat",
|
|
|
|
| 431 |
def generate():
|
| 432 |
first_chunk_time = None
|
| 433 |
full_response_content = ""
|
| 434 |
+
reasoning_content_accumulated = "" # Accumulate reasoning content
|
| 435 |
+
content_accumulated = "" # Accumulate regular content
|
| 436 |
+
|
|
|
|
| 437 |
for chunk in response.iter_content(chunk_size=1024):
|
| 438 |
if chunk:
|
| 439 |
if first_chunk_time is None:
|
| 440 |
first_chunk_time = time.time()
|
|
|
|
| 441 |
full_response_content += chunk.decode("utf-8")
|
| 442 |
+
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
end_time = time.time()
|
| 445 |
first_token_time = (
|
|
|
|
| 448 |
)
|
| 449 |
total_time = end_time - start_time
|
| 450 |
|
| 451 |
+
prompt_tokens = 0
|
| 452 |
+
completion_tokens = 0
|
| 453 |
+
for line in full_response_content.splitlines():
|
| 454 |
+
if line.startswith("data:"):
|
| 455 |
+
line = line[5:].strip()
|
| 456 |
+
if line == "[DONE]":
|
| 457 |
+
continue
|
| 458 |
+
try:
|
| 459 |
+
response_json = json.loads(line)
|
| 460 |
+
|
| 461 |
+
if (
|
| 462 |
+
"usage" in response_json and
|
| 463 |
+
"completion_tokens" in response_json["usage"]
|
| 464 |
+
):
|
| 465 |
+
completion_tokens += response_json[
|
| 466 |
+
"usage"
|
| 467 |
+
]["completion_tokens"]
|
| 468 |
+
|
| 469 |
+
# Special handling for deepseek-reasoner in streaming mode
|
| 470 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
| 471 |
+
delta = response_json["choices"][0].get("delta", {})
|
| 472 |
+
if "reasoning_content" in delta:
|
| 473 |
+
reasoning_content_accumulated += delta["reasoning_content"]
|
| 474 |
+
if "content" in delta:
|
| 475 |
+
content_accumulated += delta["content"]
|
| 476 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
| 477 |
+
# Handle other models normally
|
| 478 |
+
delta = response_json["choices"][0].get("delta", {})
|
| 479 |
+
if "content" in delta:
|
| 480 |
+
content_accumulated += delta["content"]
|
| 481 |
+
|
| 482 |
+
if (
|
| 483 |
+
"usage" in response_json and
|
| 484 |
+
"prompt_tokens" in response_json["usage"]
|
| 485 |
+
):
|
| 486 |
+
prompt_tokens = response_json[
|
| 487 |
+
"usage"
|
| 488 |
+
]["prompt_tokens"]
|
| 489 |
+
|
| 490 |
+
except (
|
| 491 |
+
KeyError,
|
| 492 |
+
ValueError,
|
| 493 |
+
IndexError
|
| 494 |
+
) as e:
|
| 495 |
+
logging.error(
|
| 496 |
+
f"解析流式响应单行 JSON 失败: {e}, "
|
| 497 |
+
f"行内容: {line}"
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
# Format the accumulated reasoning content after processing all chunks
|
| 501 |
+
if model_name == "deepseek-reasoner":
|
| 502 |
+
formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
|
| 503 |
+
response_content = formatted_reasoning + "\n" + content_accumulated
|
| 504 |
+
else:
|
| 505 |
+
response_content = content_accumulated
|
| 506 |
|
| 507 |
user_content = ""
|
| 508 |
messages = data.get("messages", [])
|
|
|
|
| 526 |
user_content_replaced = user_content.replace(
|
| 527 |
'\n', '\\n'
|
| 528 |
).replace('\r', '\\n')
|
| 529 |
+
response_content_replaced = response_content.replace(
|
| 530 |
+
'\n', '\\n'
|
| 531 |
+
).replace('\r', '\\n')
|
| 532 |
+
|
| 533 |
logging.info(
|
| 534 |
f"使用的key: {api_key}, "
|
| 535 |
f"提示token: {prompt_tokens}, "
|
|
|
|
| 537 |
f"首字用时: {first_token_time:.4f}秒, "
|
| 538 |
f"总共用时: {total_time:.4f}秒, "
|
| 539 |
f"使用的模型: {model_name}, "
|
| 540 |
+
f"用户的内容: {user_content_replaced}, "
|
| 541 |
+
f"输出的内容: {response_content_replaced}"
|
| 542 |
)
|
| 543 |
|
| 544 |
with data_lock:
|
| 545 |
request_timestamps.append(time.time())
|
| 546 |
token_counts.append(prompt_tokens + completion_tokens)
|
| 547 |
+
|
| 548 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
| 549 |
+
yield "data: [DONE]\n\n"
|
| 550 |
+
|
| 551 |
|
| 552 |
return Response(
|
| 553 |
stream_with_context(generate()),
|
| 554 |
+
content_type="text/event-stream"
|
| 555 |
)
|
| 556 |
else:
|
| 557 |
# ... (Non-streaming part remains the same as in the previous response)
|
|
|
|
| 570 |
choice = response_json["choices"][0]
|
| 571 |
if "message" in choice:
|
| 572 |
if "reasoning_content" in choice["message"]:
|
| 573 |
+
formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
|
|
|
|
| 574 |
response_content += formatted_reasoning + "\n"
|
| 575 |
if "content" in choice["message"]:
|
| 576 |
response_content += choice["message"]["content"]
|