Update app.py
Browse files
app.py
CHANGED
|
@@ -547,8 +547,8 @@ def check_tokens():
|
|
| 547 |
return jsonify(results)
|
| 548 |
|
| 549 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
| 550 |
-
def handsome_chat_completions(
|
| 551 |
-
if not check_authorization(request
|
| 552 |
return jsonify({"error": "Unauthorized"}), 401
|
| 553 |
|
| 554 |
data = request.get_json()
|
|
@@ -556,13 +556,13 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 556 |
return jsonify({"error": "Invalid request data"}), 400
|
| 557 |
|
| 558 |
model_name = data['model']
|
| 559 |
-
|
| 560 |
request_type = determine_request_type(
|
| 561 |
model_name,
|
| 562 |
text_models + image_models,
|
| 563 |
free_text_models + free_image_models
|
| 564 |
)
|
| 565 |
-
|
| 566 |
api_key = select_key(request_type, model_name)
|
| 567 |
|
| 568 |
if not api_key:
|
|
@@ -580,7 +580,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 580 |
"Authorization": f"Bearer {api_key}",
|
| 581 |
"Content-Type": "application/json"
|
| 582 |
}
|
| 583 |
-
|
| 584 |
if model_name in image_models:
|
| 585 |
# Handle image generation
|
| 586 |
# Map OpenAI-style parameters to SiliconFlow's parameters
|
|
@@ -595,7 +595,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 595 |
"seed": data.get("seed"),
|
| 596 |
"prompt_enhancement": False,
|
| 597 |
}
|
| 598 |
-
|
| 599 |
# Parameter validation and adjustments
|
| 600 |
if siliconflow_data["batch_size"] < 1:
|
| 601 |
siliconflow_data["batch_size"] = 1
|
|
@@ -606,12 +606,12 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 606 |
siliconflow_data["num_inference_steps"] = 1
|
| 607 |
if siliconflow_data["num_inference_steps"] > 50:
|
| 608 |
siliconflow_data["num_inference_steps"] = 50
|
| 609 |
-
|
| 610 |
if siliconflow_data["guidance_scale"] < 0:
|
| 611 |
siliconflow_data["guidance_scale"] = 0
|
| 612 |
if siliconflow_data["guidance_scale"] > 100:
|
| 613 |
siliconflow_data["guidance_scale"] = 100
|
| 614 |
-
|
| 615 |
if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
|
| 616 |
siliconflow_data["image_size"] = "1024x1024"
|
| 617 |
|
|
@@ -623,7 +623,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 623 |
json=siliconflow_data,
|
| 624 |
timeout=120
|
| 625 |
)
|
| 626 |
-
|
| 627 |
if response.status_code == 429:
|
| 628 |
return jsonify(response.json()), 429
|
| 629 |
|
|
@@ -631,10 +631,10 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 631 |
end_time = time.time()
|
| 632 |
response_json = response.json()
|
| 633 |
total_time = end_time - start_time
|
| 634 |
-
|
| 635 |
try:
|
| 636 |
images = response_json.get("images", [])
|
| 637 |
-
|
| 638 |
# Extract the first URL if available
|
| 639 |
image_url = ""
|
| 640 |
if images and isinstance(images[0], dict) and "url" in images[0]:
|
|
@@ -643,7 +643,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 643 |
elif images and isinstance(images[0], str):
|
| 644 |
image_url = images[0]
|
| 645 |
logging.info(f"Extracted image URL: {image_url}")
|
| 646 |
-
|
| 647 |
# Construct the expected JSON output - Mimicking OpenAI
|
| 648 |
response_data = {
|
| 649 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
|
@@ -656,10 +656,21 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 656 |
"index": 0,
|
| 657 |
"message": {
|
| 658 |
"role": "assistant",
|
| 659 |
-
"content": None,
|
| 660 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
},
|
| 662 |
-
"finish_reason": "
|
| 663 |
}
|
| 664 |
],
|
| 665 |
"usage": { # Added usage
|
|
@@ -668,9 +679,7 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 668 |
"total_tokens": 0
|
| 669 |
}
|
| 670 |
}
|
| 671 |
-
|
| 672 |
-
response_data["choices"][0]["message"]["content"] = "Failed to generate image"
|
| 673 |
-
|
| 674 |
except (KeyError, ValueError, IndexError) as e:
|
| 675 |
logging.error(
|
| 676 |
f"解析响应 JSON 失败: {e}, "
|
|
@@ -688,7 +697,6 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 688 |
"message": {
|
| 689 |
"role": "assistant",
|
| 690 |
"content": "Failed to process image data",
|
| 691 |
-
"image_url": None,
|
| 692 |
},
|
| 693 |
"finish_reason": "stop",
|
| 694 |
}
|
|
@@ -706,6 +714,10 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 706 |
f"使用的模型: {model_name}"
|
| 707 |
)
|
| 708 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
return jsonify(response_data)
|
| 710 |
except requests.exceptions.RequestException as e:
|
| 711 |
logging.error(f"请求转发异常: {e}")
|
|
@@ -718,69 +730,191 @@ def handsome_chat_completions(request, authorization_key):
|
|
| 718 |
TEST_MODEL_ENDPOINT,
|
| 719 |
headers=headers,
|
| 720 |
json=data,
|
| 721 |
-
stream=False,
|
| 722 |
timeout=60
|
| 723 |
)
|
| 724 |
if response.status_code == 429:
|
| 725 |
return jsonify(response.json()), 429
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 762 |
)
|
| 763 |
|
| 764 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 765 |
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
response_content_replaced = response_content.replace(
|
| 770 |
-
'\n', '\\n'
|
| 771 |
-
).replace('\r', '\\n')
|
| 772 |
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
|
| 785 |
except requests.exceptions.RequestException as e:
|
| 786 |
logging.error(f"请求转发异常: {e}")
|
|
|
|
| 547 |
return jsonify(results)
|
| 548 |
|
| 549 |
@app.route('/handsome/v1/chat/completions', methods=['POST'])
|
| 550 |
+
def handsome_chat_completions():
|
| 551 |
+
if not check_authorization(request):
|
| 552 |
return jsonify({"error": "Unauthorized"}), 401
|
| 553 |
|
| 554 |
data = request.get_json()
|
|
|
|
| 556 |
return jsonify({"error": "Invalid request data"}), 400
|
| 557 |
|
| 558 |
model_name = data['model']
|
| 559 |
+
|
| 560 |
request_type = determine_request_type(
|
| 561 |
model_name,
|
| 562 |
text_models + image_models,
|
| 563 |
free_text_models + free_image_models
|
| 564 |
)
|
| 565 |
+
|
| 566 |
api_key = select_key(request_type, model_name)
|
| 567 |
|
| 568 |
if not api_key:
|
|
|
|
| 580 |
"Authorization": f"Bearer {api_key}",
|
| 581 |
"Content-Type": "application/json"
|
| 582 |
}
|
| 583 |
+
|
| 584 |
if model_name in image_models:
|
| 585 |
# Handle image generation
|
| 586 |
# Map OpenAI-style parameters to SiliconFlow's parameters
|
|
|
|
| 595 |
"seed": data.get("seed"),
|
| 596 |
"prompt_enhancement": False,
|
| 597 |
}
|
| 598 |
+
|
| 599 |
# Parameter validation and adjustments
|
| 600 |
if siliconflow_data["batch_size"] < 1:
|
| 601 |
siliconflow_data["batch_size"] = 1
|
|
|
|
| 606 |
siliconflow_data["num_inference_steps"] = 1
|
| 607 |
if siliconflow_data["num_inference_steps"] > 50:
|
| 608 |
siliconflow_data["num_inference_steps"] = 50
|
| 609 |
+
|
| 610 |
if siliconflow_data["guidance_scale"] < 0:
|
| 611 |
siliconflow_data["guidance_scale"] = 0
|
| 612 |
if siliconflow_data["guidance_scale"] > 100:
|
| 613 |
siliconflow_data["guidance_scale"] = 100
|
| 614 |
+
|
| 615 |
if siliconflow_data["image_size"] not in ["1024x1024", "512x1024", "768x512", "768x1024", "1024x576", "576x1024"]:
|
| 616 |
siliconflow_data["image_size"] = "1024x1024"
|
| 617 |
|
|
|
|
| 623 |
json=siliconflow_data,
|
| 624 |
timeout=120
|
| 625 |
)
|
| 626 |
+
|
| 627 |
if response.status_code == 429:
|
| 628 |
return jsonify(response.json()), 429
|
| 629 |
|
|
|
|
| 631 |
end_time = time.time()
|
| 632 |
response_json = response.json()
|
| 633 |
total_time = end_time - start_time
|
| 634 |
+
|
| 635 |
try:
|
| 636 |
images = response_json.get("images", [])
|
| 637 |
+
|
| 638 |
# Extract the first URL if available
|
| 639 |
image_url = ""
|
| 640 |
if images and isinstance(images[0], dict) and "url" in images[0]:
|
|
|
|
| 643 |
elif images and isinstance(images[0], str):
|
| 644 |
image_url = images[0]
|
| 645 |
logging.info(f"Extracted image URL: {image_url}")
|
| 646 |
+
|
| 647 |
# Construct the expected JSON output - Mimicking OpenAI
|
| 648 |
response_data = {
|
| 649 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
|
|
|
| 656 |
"index": 0,
|
| 657 |
"message": {
|
| 658 |
"role": "assistant",
|
| 659 |
+
"content": None, # set to None as image is in tool_calls
|
| 660 |
+
"tool_calls": [
|
| 661 |
+
{
|
| 662 |
+
"id": f"call_{uuid.uuid4()}",
|
| 663 |
+
"type": "function",
|
| 664 |
+
"function": {
|
| 665 |
+
"name": "image_generation",
|
| 666 |
+
"arguments": json.dumps({
|
| 667 |
+
"image_url": image_url
|
| 668 |
+
})
|
| 669 |
+
}
|
| 670 |
+
}
|
| 671 |
+
]
|
| 672 |
},
|
| 673 |
+
"finish_reason": "tool_calls",
|
| 674 |
}
|
| 675 |
],
|
| 676 |
"usage": { # Added usage
|
|
|
|
| 679 |
"total_tokens": 0
|
| 680 |
}
|
| 681 |
}
|
| 682 |
+
|
|
|
|
|
|
|
| 683 |
except (KeyError, ValueError, IndexError) as e:
|
| 684 |
logging.error(
|
| 685 |
f"解析响应 JSON 失败: {e}, "
|
|
|
|
| 697 |
"message": {
|
| 698 |
"role": "assistant",
|
| 699 |
"content": "Failed to process image data",
|
|
|
|
| 700 |
},
|
| 701 |
"finish_reason": "stop",
|
| 702 |
}
|
|
|
|
| 714 |
f"使用的模型: {model_name}"
|
| 715 |
)
|
| 716 |
|
| 717 |
+
with data_lock:
|
| 718 |
+
request_timestamps.append(time.time())
|
| 719 |
+
token_counts.append(0) # Image generation doesn't use tokens
|
| 720 |
+
|
| 721 |
return jsonify(response_data)
|
| 722 |
except requests.exceptions.RequestException as e:
|
| 723 |
logging.error(f"请求转发异常: {e}")
|
|
|
|
| 730 |
TEST_MODEL_ENDPOINT,
|
| 731 |
headers=headers,
|
| 732 |
json=data,
|
| 733 |
+
stream=data.get("stream", False),
|
| 734 |
timeout=60
|
| 735 |
)
|
| 736 |
if response.status_code == 429:
|
| 737 |
return jsonify(response.json()), 429
|
| 738 |
+
|
| 739 |
+
if data.get("stream", False):
|
| 740 |
+
def generate():
|
| 741 |
+
first_chunk_time = None
|
| 742 |
+
full_response_content = ""
|
| 743 |
+
for chunk in response.iter_content(chunk_size=1024):
|
| 744 |
+
if chunk:
|
| 745 |
+
if first_chunk_time is None:
|
| 746 |
+
first_chunk_time = time.time()
|
| 747 |
+
full_response_content += chunk.decode("utf-8")
|
| 748 |
+
yield chunk
|
| 749 |
+
|
| 750 |
+
end_time = time.time()
|
| 751 |
+
first_token_time = (
|
| 752 |
+
first_chunk_time - start_time
|
| 753 |
+
if first_chunk_time else 0
|
| 754 |
+
)
|
| 755 |
+
total_time = end_time - start_time
|
| 756 |
+
|
| 757 |
+
prompt_tokens = 0
|
| 758 |
+
completion_tokens = 0
|
| 759 |
+
response_content = ""
|
| 760 |
+
for line in full_response_content.splitlines():
|
| 761 |
+
if line.startswith("data:"):
|
| 762 |
+
line = line[5:].strip()
|
| 763 |
+
if line == "[DONE]":
|
| 764 |
+
continue
|
| 765 |
+
try:
|
| 766 |
+
response_json = json.loads(line)
|
| 767 |
+
|
| 768 |
+
if (
|
| 769 |
+
"usage" in response_json and
|
| 770 |
+
"completion_tokens" in response_json["usage"]
|
| 771 |
+
):
|
| 772 |
+
completion_tokens = response_json[
|
| 773 |
+
"usage"
|
| 774 |
+
]["completion_tokens"]
|
| 775 |
+
|
| 776 |
+
if (
|
| 777 |
+
"choices" in response_json and
|
| 778 |
+
len(response_json["choices"]) > 0 and
|
| 779 |
+
"delta" in response_json["choices"][0] and
|
| 780 |
+
"content" in response_json[
|
| 781 |
+
"choices"
|
| 782 |
+
][0]["delta"]
|
| 783 |
+
):
|
| 784 |
+
response_content += response_json[
|
| 785 |
+
"choices"
|
| 786 |
+
][0]["delta"]["content"]
|
| 787 |
+
|
| 788 |
+
if (
|
| 789 |
+
"usage" in response_json and
|
| 790 |
+
"prompt_tokens" in response_json["usage"]
|
| 791 |
+
):
|
| 792 |
+
prompt_tokens = response_json[
|
| 793 |
+
"usage"
|
| 794 |
+
]["prompt_tokens"]
|
| 795 |
+
|
| 796 |
+
except (
|
| 797 |
+
KeyError,
|
| 798 |
+
ValueError,
|
| 799 |
+
IndexError
|
| 800 |
+
) as e:
|
| 801 |
+
logging.error(
|
| 802 |
+
f"解析流式响应单行 JSON 失败: {e}, "
|
| 803 |
+
f"行内容: {line}"
|
| 804 |
)
|
| 805 |
|
| 806 |
+
user_content = ""
|
| 807 |
+
messages = data.get("messages", [])
|
| 808 |
+
for message in messages:
|
| 809 |
+
if message["role"] == "user":
|
| 810 |
+
if isinstance(message["content"], str):
|
| 811 |
+
user_content += message["content"] + " "
|
| 812 |
+
elif isinstance(message["content"], list):
|
| 813 |
+
for item in message["content"]:
|
| 814 |
+
if (
|
| 815 |
+
isinstance(item, dict) and
|
| 816 |
+
item.get("type") == "text"
|
| 817 |
+
):
|
| 818 |
+
user_content += (
|
| 819 |
+
item.get("text", "") +
|
| 820 |
+
" "
|
| 821 |
+
)
|
| 822 |
+
|
| 823 |
+
user_content = user_content.strip()
|
| 824 |
+
|
| 825 |
+
user_content_replaced = user_content.replace(
|
| 826 |
+
'\n', '\\n'
|
| 827 |
+
).replace('\r', '\\n')
|
| 828 |
+
response_content_replaced = response_content.replace(
|
| 829 |
+
'\n', '\\n'
|
| 830 |
+
).replace('\r', '\\n')
|
| 831 |
+
|
| 832 |
+
logging.info(
|
| 833 |
+
f"使用的key: {api_key}, "
|
| 834 |
+
f"提示token: {prompt_tokens}, "
|
| 835 |
+
f"输出token: {completion_tokens}, "
|
| 836 |
+
f"首字用时: {first_token_time:.4f}秒, "
|
| 837 |
+
f"总共用时: {total_time:.4f}秒, "
|
| 838 |
+
f"使用的模型: {model_name}, "
|
| 839 |
+
f"用户的内容: {user_content_replaced}, "
|
| 840 |
+
f"输出的内容: {response_content_replaced}"
|
| 841 |
+
)
|
| 842 |
|
| 843 |
+
with data_lock:
|
| 844 |
+
request_timestamps.append(time.time())
|
| 845 |
+
token_counts.append(prompt_tokens+completion_tokens)
|
|
|
|
|
|
|
|
|
|
| 846 |
|
| 847 |
+
return Response(
|
| 848 |
+
stream_with_context(generate()),
|
| 849 |
+
content_type=response.headers['Content-Type']
|
| 850 |
+
)
|
| 851 |
+
else:
|
| 852 |
+
response.raise_for_status()
|
| 853 |
+
end_time = time.time()
|
| 854 |
+
response_json = response.json()
|
| 855 |
+
total_time = end_time - start_time
|
| 856 |
+
|
| 857 |
+
try:
|
| 858 |
+
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
| 859 |
+
completion_tokens = response_json[
|
| 860 |
+
"usage"
|
| 861 |
+
]["completion_tokens"]
|
| 862 |
+
response_content = response_json[
|
| 863 |
+
"choices"
|
| 864 |
+
][0]["message"]["content"]
|
| 865 |
+
except (KeyError, ValueError, IndexError) as e:
|
| 866 |
+
logging.error(
|
| 867 |
+
f"解析非流式响应 JSON 失败: {e}, "
|
| 868 |
+
f"完整内容: {response_json}"
|
| 869 |
+
)
|
| 870 |
+
prompt_tokens = 0
|
| 871 |
+
completion_tokens = 0
|
| 872 |
+
response_content = ""
|
| 873 |
+
|
| 874 |
+
user_content = ""
|
| 875 |
+
messages = data.get("messages", [])
|
| 876 |
+
for message in messages:
|
| 877 |
+
if message["role"] == "user":
|
| 878 |
+
if isinstance(message["content"], str):
|
| 879 |
+
user_content += message["content"] + " "
|
| 880 |
+
elif isinstance(message["content"], list):
|
| 881 |
+
for item in message["content"]:
|
| 882 |
+
if (
|
| 883 |
+
isinstance(item, dict) and
|
| 884 |
+
item.get("type") == "text"
|
| 885 |
+
):
|
| 886 |
+
user_content += (
|
| 887 |
+
item.get("text", "") +
|
| 888 |
+
" "
|
| 889 |
+
)
|
| 890 |
+
|
| 891 |
+
user_content = user_content.strip()
|
| 892 |
+
|
| 893 |
+
user_content_replaced = user_content.replace(
|
| 894 |
+
'\n', '\\n'
|
| 895 |
+
).replace('\r', '\\n')
|
| 896 |
+
response_content_replaced = response_content.replace(
|
| 897 |
+
'\n', '\\n'
|
| 898 |
+
).replace('\r', '\\n')
|
| 899 |
+
|
| 900 |
+
logging.info(
|
| 901 |
+
f"使用的key: {api_key}, "
|
| 902 |
+
f"提示token: {prompt_tokens}, "
|
| 903 |
+
f"输出token: {completion_tokens}, "
|
| 904 |
+
f"首字用时: 0, "
|
| 905 |
+
f"总共用时: {total_time:.4f}秒, "
|
| 906 |
+
f"使用的模型: {model_name}, "
|
| 907 |
+
f"用户的内容: {user_content_replaced}, "
|
| 908 |
+
f"输出的内容: {response_content_replaced}"
|
| 909 |
+
)
|
| 910 |
+
with data_lock:
|
| 911 |
+
request_timestamps.append(time.time())
|
| 912 |
+
if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
|
| 913 |
+
token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
|
| 914 |
+
else:
|
| 915 |
+
token_counts.append(0)
|
| 916 |
+
|
| 917 |
+
return jsonify(response_json)
|
| 918 |
|
| 919 |
except requests.exceptions.RequestException as e:
|
| 920 |
logging.error(f"请求转发异常: {e}")
|