Update app.py
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
|
|
| 53 |
if not data.get("is_available", False):
|
| 54 |
logging.warning(f"API Key: {api_key} is not available.")
|
| 55 |
return None
|
| 56 |
-
|
| 57 |
balance_infos = data.get("balance_infos", [])
|
| 58 |
total_balance_cny = 0.0
|
| 59 |
usd_balance = 0.0
|
|
@@ -270,12 +270,12 @@ def check_tokens():
|
|
| 270 |
)
|
| 271 |
|
| 272 |
return jsonify(results)
|
| 273 |
-
|
| 274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
| 275 |
def list_models():
|
| 276 |
if not check_authorization(request):
|
| 277 |
return jsonify({"error": "Unauthorized"}), 401
|
| 278 |
-
|
| 279 |
detailed_models = [
|
| 280 |
{
|
| 281 |
"id": "deepseek-chat",
|
|
@@ -379,6 +379,38 @@ def billing_usage():
|
|
| 379 |
"total_usage": 0
|
| 380 |
})
|
| 381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
| 383 |
def handsome_chat_completions():
|
| 384 |
if not check_authorization(request):
|
|
@@ -389,7 +421,7 @@ def handsome_chat_completions():
|
|
| 389 |
return jsonify({"error": "Invalid request data"}), 400
|
| 390 |
|
| 391 |
model_name = data['model']
|
| 392 |
-
|
| 393 |
api_key = select_key(model_name)
|
| 394 |
|
| 395 |
if not api_key:
|
|
@@ -402,8 +434,7 @@ def handsome_chat_completions():
|
|
| 402 |
)
|
| 403 |
}
|
| 404 |
), 429
|
| 405 |
-
|
| 406 |
-
# Special handling for deepseek-reasoner
|
| 407 |
if model_name == "deepseek-reasoner":
|
| 408 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
| 409 |
if param in data:
|
|
@@ -413,7 +444,7 @@ def handsome_chat_completions():
|
|
| 413 |
"Authorization": f"Bearer {api_key}",
|
| 414 |
"Content-Type": "application/json"
|
| 415 |
}
|
| 416 |
-
|
| 417 |
try:
|
| 418 |
start_time = time.time()
|
| 419 |
response = requests.post(
|
|
@@ -431,42 +462,12 @@ def handsome_chat_completions():
|
|
| 431 |
def generate():
|
| 432 |
first_chunk_time = None
|
| 433 |
full_response_content = ""
|
| 434 |
-
reasoning_content_accumulated = "" # Accumulate reasoning content
|
| 435 |
-
content_accumulated = "" # Accumulate regular content
|
| 436 |
-
first_reasoning_chunk = True # Flag to track the first reasoning chunk
|
| 437 |
-
|
| 438 |
for chunk in response.iter_content(chunk_size=1024):
|
| 439 |
if chunk:
|
| 440 |
if first_chunk_time is None:
|
| 441 |
first_chunk_time = time.time()
|
| 442 |
full_response_content += chunk.decode("utf-8")
|
| 443 |
-
|
| 444 |
-
for line in chunk.decode("utf-8").splitlines():
|
| 445 |
-
if line.startswith("data:"):
|
| 446 |
-
try:
|
| 447 |
-
chunk_json = json.loads(line.lstrip("data: ").strip())
|
| 448 |
-
if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
|
| 449 |
-
delta = chunk_json["choices"][0].get("delta", {})
|
| 450 |
-
|
| 451 |
-
if delta.get("reasoning_content") is not None:
|
| 452 |
-
if first_reasoning_chunk:
|
| 453 |
-
reasoning_content_accumulated += "```Thinking\n"
|
| 454 |
-
first_reasoning_chunk = False
|
| 455 |
-
reasoning_content_accumulated += delta.get("reasoning_content", "")
|
| 456 |
-
|
| 457 |
-
if delta.get("content") is not None:
|
| 458 |
-
if not first_reasoning_chunk:
|
| 459 |
-
reasoning_content_accumulated += "\n```"
|
| 460 |
-
first_reasoning_chunk = True
|
| 461 |
-
content_accumulated += delta.get("content", "")
|
| 462 |
-
yield f"data: {json.dumps({'choices': [{'delta': {'content': (reasoning_content_accumulated if reasoning_content_accumulated else '') + content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
| 463 |
-
reasoning_content_accumulated = ""
|
| 464 |
-
content_accumulated = ""
|
| 465 |
-
|
| 466 |
-
except (KeyError, ValueError, json.JSONDecodeError) as e:
|
| 467 |
-
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
|
| 468 |
-
continue
|
| 469 |
-
|
| 470 |
|
| 471 |
end_time = time.time()
|
| 472 |
first_token_time = (
|
|
@@ -477,6 +478,7 @@ def handsome_chat_completions():
|
|
| 477 |
|
| 478 |
prompt_tokens = 0
|
| 479 |
completion_tokens = 0
|
|
|
|
| 480 |
for line in full_response_content.splitlines():
|
| 481 |
if line.startswith("data:"):
|
| 482 |
line = line[5:].strip()
|
|
@@ -489,9 +491,22 @@ def handsome_chat_completions():
|
|
| 489 |
"usage" in response_json and
|
| 490 |
"completion_tokens" in response_json["usage"]
|
| 491 |
):
|
| 492 |
-
completion_tokens
|
| 493 |
"usage"
|
| 494 |
]["completion_tokens"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
if (
|
| 496 |
"usage" in response_json and
|
| 497 |
"prompt_tokens" in response_json["usage"]
|
|
@@ -532,8 +547,7 @@ def handsome_chat_completions():
|
|
| 532 |
user_content_replaced = user_content.replace(
|
| 533 |
'\n', '\\n'
|
| 534 |
).replace('\r', '\\n')
|
| 535 |
-
response_content_replaced = (
|
| 536 |
-
response_content_replaced = response_content_replaced.replace(
|
| 537 |
'\n', '\\n'
|
| 538 |
).replace('\r', '\\n')
|
| 539 |
|
|
@@ -550,16 +564,13 @@ def handsome_chat_completions():
|
|
| 550 |
|
| 551 |
with data_lock:
|
| 552 |
request_timestamps.append(time.time())
|
| 553 |
-
token_counts.append(prompt_tokens
|
| 554 |
-
|
| 555 |
-
yield "data: [DONE]\n\n"
|
| 556 |
|
| 557 |
return Response(
|
| 558 |
stream_with_context(generate()),
|
| 559 |
-
content_type=
|
| 560 |
)
|
| 561 |
else:
|
| 562 |
-
# ... (Non-streaming part remains the same as in the previous response)
|
| 563 |
response.raise_for_status()
|
| 564 |
end_time = time.time()
|
| 565 |
response_json = response.json()
|
|
@@ -567,21 +578,12 @@ def handsome_chat_completions():
|
|
| 567 |
|
| 568 |
try:
|
| 569 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
| 570 |
-
completion_tokens = response_json[
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
if "message" in choice:
|
| 577 |
-
if "reasoning_content" in choice["message"]:
|
| 578 |
-
formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
|
| 579 |
-
response_content += formatted_reasoning + "\n"
|
| 580 |
-
if "content" in choice["message"]:
|
| 581 |
-
response_content += choice["message"]["content"]
|
| 582 |
-
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
| 583 |
-
response_content = response_json["choices"][0]["message"]["content"]
|
| 584 |
-
|
| 585 |
except (KeyError, ValueError, IndexError) as e:
|
| 586 |
logging.error(
|
| 587 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
@@ -604,8 +606,7 @@ def handsome_chat_completions():
|
|
| 604 |
item.get("type") == "text"
|
| 605 |
):
|
| 606 |
user_content += (
|
| 607 |
-
item.get("text", "") +
|
| 608 |
-
" "
|
| 609 |
)
|
| 610 |
|
| 611 |
user_content = user_content.strip()
|
|
@@ -629,32 +630,12 @@ def handsome_chat_completions():
|
|
| 629 |
)
|
| 630 |
with data_lock:
|
| 631 |
request_timestamps.append(time.time())
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
"id": response_json.get("id", ""),
|
| 637 |
-
"object": "chat.completion",
|
| 638 |
-
"created": response_json.get("created", int(time.time())),
|
| 639 |
-
"model": model_name,
|
| 640 |
-
"choices": [
|
| 641 |
-
{
|
| 642 |
-
"index": 0,
|
| 643 |
-
"message": {
|
| 644 |
-
"role": "assistant",
|
| 645 |
-
"content": response_content
|
| 646 |
-
},
|
| 647 |
-
"finish_reason": "stop"
|
| 648 |
-
}
|
| 649 |
-
],
|
| 650 |
-
"usage": {
|
| 651 |
-
"prompt_tokens": prompt_tokens,
|
| 652 |
-
"completion_tokens": completion_tokens,
|
| 653 |
-
"total_tokens": prompt_tokens + completion_tokens
|
| 654 |
-
}
|
| 655 |
-
}
|
| 656 |
|
| 657 |
-
return jsonify(
|
| 658 |
|
| 659 |
except requests.exceptions.RequestException as e:
|
| 660 |
logging.error(f"请求转发异常: {e}")
|
|
@@ -680,4 +661,4 @@ if __name__ == '__main__':
|
|
| 680 |
debug=False,
|
| 681 |
host='0.0.0.0',
|
| 682 |
port=int(os.environ.get('PORT', 7860))
|
| 683 |
-
)
|
|
|
|
| 53 |
if not data.get("is_available", False):
|
| 54 |
logging.warning(f"API Key: {api_key} is not available.")
|
| 55 |
return None
|
| 56 |
+
|
| 57 |
balance_infos = data.get("balance_infos", [])
|
| 58 |
total_balance_cny = 0.0
|
| 59 |
usd_balance = 0.0
|
|
|
|
| 270 |
)
|
| 271 |
|
| 272 |
return jsonify(results)
|
| 273 |
+
|
| 274 |
@app.route('/handsome/v1/models', methods=['GET'])
|
| 275 |
def list_models():
|
| 276 |
if not check_authorization(request):
|
| 277 |
return jsonify({"error": "Unauthorized"}), 401
|
| 278 |
+
|
| 279 |
detailed_models = [
|
| 280 |
{
|
| 281 |
"id": "deepseek-chat",
|
|
|
|
| 379 |
"total_usage": 0
|
| 380 |
})
|
| 381 |
|
| 382 |
+
@app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
|
| 383 |
+
def billing_subscription():
|
| 384 |
+
if not check_authorization(request):
|
| 385 |
+
return jsonify({"error": "Unauthorized"}), 401
|
| 386 |
+
|
| 387 |
+
total_balance = get_billing_info()
|
| 388 |
+
|
| 389 |
+
return jsonify({
|
| 390 |
+
"object": "billing_subscription",
|
| 391 |
+
"has_payment_method": False,
|
| 392 |
+
"canceled": False,
|
| 393 |
+
"canceled_at": None,
|
| 394 |
+
"delinquent": None,
|
| 395 |
+
"access_until": int(datetime(9999, 12, 31).timestamp()),
|
| 396 |
+
"soft_limit": 0,
|
| 397 |
+
"hard_limit": total_balance,
|
| 398 |
+
"system_hard_limit": total_balance,
|
| 399 |
+
"soft_limit_usd": 0,
|
| 400 |
+
"hard_limit_usd": total_balance,
|
| 401 |
+
"system_hard_limit_usd": total_balance,
|
| 402 |
+
"plan": {
|
| 403 |
+
"name": "SiliconFlow API",
|
| 404 |
+
"id": "siliconflow-api"
|
| 405 |
+
},
|
| 406 |
+
"account_name": "SiliconFlow User",
|
| 407 |
+
"po_number": None,
|
| 408 |
+
"billing_email": None,
|
| 409 |
+
"tax_ids": [],
|
| 410 |
+
"billing_address": None,
|
| 411 |
+
"business_address": None
|
| 412 |
+
})
|
| 413 |
+
|
| 414 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
| 415 |
def handsome_chat_completions():
|
| 416 |
if not check_authorization(request):
|
|
|
|
| 421 |
return jsonify({"error": "Invalid request data"}), 400
|
| 422 |
|
| 423 |
model_name = data['model']
|
| 424 |
+
|
| 425 |
api_key = select_key(model_name)
|
| 426 |
|
| 427 |
if not api_key:
|
|
|
|
| 434 |
)
|
| 435 |
}
|
| 436 |
), 429
|
| 437 |
+
|
|
|
|
| 438 |
if model_name == "deepseek-reasoner":
|
| 439 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
| 440 |
if param in data:
|
|
|
|
| 444 |
"Authorization": f"Bearer {api_key}",
|
| 445 |
"Content-Type": "application/json"
|
| 446 |
}
|
| 447 |
+
|
| 448 |
try:
|
| 449 |
start_time = time.time()
|
| 450 |
response = requests.post(
|
|
|
|
| 462 |
def generate():
|
| 463 |
first_chunk_time = None
|
| 464 |
full_response_content = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
for chunk in response.iter_content(chunk_size=1024):
|
| 466 |
if chunk:
|
| 467 |
if first_chunk_time is None:
|
| 468 |
first_chunk_time = time.time()
|
| 469 |
full_response_content += chunk.decode("utf-8")
|
| 470 |
+
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
end_time = time.time()
|
| 473 |
first_token_time = (
|
|
|
|
| 478 |
|
| 479 |
prompt_tokens = 0
|
| 480 |
completion_tokens = 0
|
| 481 |
+
response_content = ""
|
| 482 |
for line in full_response_content.splitlines():
|
| 483 |
if line.startswith("data:"):
|
| 484 |
line = line[5:].strip()
|
|
|
|
| 491 |
"usage" in response_json and
|
| 492 |
"completion_tokens" in response_json["usage"]
|
| 493 |
):
|
| 494 |
+
completion_tokens = response_json[
|
| 495 |
"usage"
|
| 496 |
]["completion_tokens"]
|
| 497 |
+
|
| 498 |
+
if (
|
| 499 |
+
"choices" in response_json and
|
| 500 |
+
len(response_json["choices"]) > 0 and
|
| 501 |
+
"delta" in response_json["choices"][0] and
|
| 502 |
+
"content" in response_json[
|
| 503 |
+
"choices"
|
| 504 |
+
][0]["delta"]
|
| 505 |
+
):
|
| 506 |
+
response_content += response_json[
|
| 507 |
+
"choices"
|
| 508 |
+
][0]["delta"]["content"]
|
| 509 |
+
|
| 510 |
if (
|
| 511 |
"usage" in response_json and
|
| 512 |
"prompt_tokens" in response_json["usage"]
|
|
|
|
| 547 |
user_content_replaced = user_content.replace(
|
| 548 |
'\n', '\\n'
|
| 549 |
).replace('\r', '\\n')
|
| 550 |
+
response_content_replaced = response_content.replace(
|
|
|
|
| 551 |
'\n', '\\n'
|
| 552 |
).replace('\r', '\\n')
|
| 553 |
|
|
|
|
| 564 |
|
| 565 |
with data_lock:
|
| 566 |
request_timestamps.append(time.time())
|
| 567 |
+
token_counts.append(prompt_tokens+completion_tokens)
|
|
|
|
|
|
|
| 568 |
|
| 569 |
return Response(
|
| 570 |
stream_with_context(generate()),
|
| 571 |
+
content_type=response.headers['Content-Type']
|
| 572 |
)
|
| 573 |
else:
|
|
|
|
| 574 |
response.raise_for_status()
|
| 575 |
end_time = time.time()
|
| 576 |
response_json = response.json()
|
|
|
|
| 578 |
|
| 579 |
try:
|
| 580 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
| 581 |
+
completion_tokens = response_json[
|
| 582 |
+
"usage"
|
| 583 |
+
]["completion_tokens"]
|
| 584 |
+
response_content = response_json[
|
| 585 |
+
"choices"
|
| 586 |
+
][0]["message"]["content"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
except (KeyError, ValueError, IndexError) as e:
|
| 588 |
logging.error(
|
| 589 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
|
|
| 606 |
item.get("type") == "text"
|
| 607 |
):
|
| 608 |
user_content += (
|
| 609 |
+
item.get("text", "") + " "
|
|
|
|
| 610 |
)
|
| 611 |
|
| 612 |
user_content = user_content.strip()
|
|
|
|
| 630 |
)
|
| 631 |
with data_lock:
|
| 632 |
request_timestamps.append(time.time())
|
| 633 |
+
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
|
| 634 |
+
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
|
| 635 |
+
else:
|
| 636 |
+
token_counts.append(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 637 |
|
| 638 |
+
return jsonify(response_json)
|
| 639 |
|
| 640 |
except requests.exceptions.RequestException as e:
|
| 641 |
logging.error(f"请求转发异常: {e}")
|
|
|
|
| 661 |
debug=False,
|
| 662 |
host='0.0.0.0',
|
| 663 |
port=int(os.environ.get('PORT', 7860))
|
| 664 |
+
)
|