ds

Paused

App Files Files Community

yangtb24 commited on Jan 21

Commit

b413541

verified ·

1 Parent(s): 869de0e

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -89

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ def get_credit_summary(api_key):
         if not data.get("is_available", False):
             logging.warning(f"API Key: {api_key} is not available.")
             return None
         balance_infos = data.get("balance_infos", [])
         total_balance_cny = 0.0
         usd_balance = 0.0
@@ -270,12 +270,12 @@ def check_tokens():
                 )
     return jsonify(results)
 @app.route('/handsome/v1/models', methods=['GET'])
 def list_models():
     if not check_authorization(request):
         return jsonify({"error": "Unauthorized"}), 401
     detailed_models = [
         {
             "id": "deepseek-chat",
@@ -379,6 +379,38 @@ def billing_usage():
         "total_usage": 0
     })
 @app.route('/handsome/v1/chat/completions', methods=['POST'])
 def handsome_chat_completions():
     if not check_authorization(request):
@@ -389,7 +421,7 @@ def handsome_chat_completions():
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
     api_key = select_key(model_name)
     if not api_key:
@@ -402,8 +434,7 @@ def handsome_chat_completions():
                 )
             }
         ), 429
-    # Special handling for deepseek-reasoner
     if model_name == "deepseek-reasoner":
         for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
             if param in data:
@@ -413,7 +444,7 @@ def handsome_chat_completions():
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     try:
         start_time = time.time()
         response = requests.post(
@@ -431,42 +462,12 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
-                reasoning_content_accumulated = ""  # Accumulate reasoning content
-                content_accumulated = ""  # Accumulate regular content
-                first_reasoning_chunk = True # Flag to track the first reasoning chunk
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        for line in chunk.decode("utf-8").splitlines():
-                            if line.startswith("data:"):
-                                try:
-                                    chunk_json = json.loads(line.lstrip("data: ").strip())
-                                    if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
-                                        delta = chunk_json["choices"][0].get("delta", {})
-                                        if delta.get("reasoning_content") is not None:
-                                            if first_reasoning_chunk:
-                                                reasoning_content_accumulated += "```Thinking\n"
-                                                first_reasoning_chunk = False
-                                            reasoning_content_accumulated += delta.get("reasoning_content", "")
-                                        if delta.get("content") is not None:
-                                            if not first_reasoning_chunk:
-                                              reasoning_content_accumulated += "\n```"
-                                              first_reasoning_chunk = True
-                                            content_accumulated += delta.get("content", "")
-                                            yield f"data: {json.dumps({'choices': [{'delta': {'content': (reasoning_content_accumulated if reasoning_content_accumulated else '') + content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
-                                            reasoning_content_accumulated = ""
-                                            content_accumulated = ""
-                                except (KeyError, ValueError, json.JSONDecodeError) as e:
-                                    logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
-                                    continue
                 end_time = time.time()
                 first_token_time = (
@@ -477,6 +478,7 @@ def handsome_chat_completions():
                 prompt_tokens = 0
                 completion_tokens = 0
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
@@ -489,9 +491,22 @@ def handsome_chat_completions():
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                             ):
-                                completion_tokens += response_json[
                                     "usage"
                                 ]["completion_tokens"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
@@ -532,8 +547,7 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
-                response_content_replaced = response_content_replaced.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
@@ -550,16 +564,13 @@ def handsome_chat_completions():
                 with data_lock:
                     request_timestamps.append(time.time())
-                    token_counts.append(prompt_tokens + completion_tokens)
-                yield "data: [DONE]\n\n"
             return Response(
                 stream_with_context(generate()),
-                content_type="text/event-stream"
             )
         else:
-            # ... (Non-streaming part remains the same as in the previous response)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
@@ -567,21 +578,12 @@ def handsome_chat_completions():
             try:
                 prompt_tokens = response_json["usage"]["prompt_tokens"]
-                completion_tokens = response_json["usage"]["completion_tokens"]
-                response_content = ""
-                # Special handling for deepseek-reasoner in non-streaming mode
-                if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
-                    choice = response_json["choices"][0]
-                    if "message" in choice:
-                        if "reasoning_content" in choice["message"]:
-                            formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
-                            response_content += formatted_reasoning + "\n"
-                        if "content" in choice["message"]:
-                            response_content += choice["message"]["content"]
-                elif "choices" in response_json and len(response_json["choices"]) > 0:
-                    response_content = response_json["choices"][0]["message"]["content"]
             except (KeyError, ValueError, IndexError) as e:
                 logging.error(
                     f"解析非流式响应 JSON 失败: {e}, "
@@ -604,8 +606,7 @@ def handsome_chat_completions():
                                 item.get("type") == "text"
                             ):
                                 user_content += (
-                                    item.get("text", "") +
-                                    " "
                                 )
             user_content = user_content.strip()
@@ -629,32 +630,12 @@ def handsome_chat_completions():
             )
             with data_lock:
                 request_timestamps.append(time.time())
-                token_counts.append(prompt_tokens + completion_tokens)
-            # Reformat the response to standard OpenAI format for non-streaming responses
-            formatted_response = {
-                "id": response_json.get("id", ""),
-                "object": "chat.completion",
-                "created": response_json.get("created", int(time.time())),
-                "model": model_name,
-                "choices": [
-                    {
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": response_content
-                        },
-                        "finish_reason": "stop"
-                    }
-                ],
-                "usage": {
-                    "prompt_tokens": prompt_tokens,
-                    "completion_tokens": completion_tokens,
-                    "total_tokens": prompt_tokens + completion_tokens
-                }
-            }
-            return jsonify(formatted_response)
     except requests.exceptions.RequestException as e:
         logging.error(f"请求转发异常: {e}")
@@ -680,4 +661,4 @@ if __name__ == '__main__':
         debug=False,
         host='0.0.0.0',
         port=int(os.environ.get('PORT', 7860))
-    )

         if not data.get("is_available", False):
             logging.warning(f"API Key: {api_key} is not available.")
             return None
         balance_infos = data.get("balance_infos", [])
         total_balance_cny = 0.0
         usd_balance = 0.0
                 )
     return jsonify(results)
 @app.route('/handsome/v1/models', methods=['GET'])
 def list_models():
     if not check_authorization(request):
         return jsonify({"error": "Unauthorized"}), 401
     detailed_models = [
         {
             "id": "deepseek-chat",
         "total_usage": 0
     })
+@app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
+def billing_subscription():
+    if not check_authorization(request):
+        return jsonify({"error": "Unauthorized"}), 401
+    total_balance = get_billing_info()
+    return jsonify({
+        "object": "billing_subscription",
+        "has_payment_method": False,
+        "canceled": False,
+        "canceled_at": None,
+        "delinquent": None,
+        "access_until": int(datetime(9999, 12, 31).timestamp()),
+        "soft_limit": 0,
+        "hard_limit": total_balance,
+        "system_hard_limit": total_balance,
+        "soft_limit_usd": 0,
+        "hard_limit_usd": total_balance,
+        "system_hard_limit_usd": total_balance,
+        "plan": {
+            "name": "SiliconFlow API",
+            "id": "siliconflow-api"
+        },
+        "account_name": "SiliconFlow User",
+        "po_number": None,
+        "billing_email": None,
+        "tax_ids": [],
+        "billing_address": None,
+        "business_address": None
+    })
 @app.route('/handsome/v1/chat/completions', methods=['POST'])
 def handsome_chat_completions():
     if not check_authorization(request):
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
     api_key = select_key(model_name)
     if not api_key:
                 )
             }
         ), 429
     if model_name == "deepseek-reasoner":
         for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
             if param in data:
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     try:
         start_time = time.time()
         response = requests.post(
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        yield chunk
                 end_time = time.time()
                 first_token_time = (
                 prompt_tokens = 0
                 completion_tokens = 0
+                response_content = ""
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                             ):
+                                completion_tokens = response_json[
                                     "usage"
                                 ]["completion_tokens"]
+                            if (
+                                "choices" in response_json and
+                                len(response_json["choices"]) > 0 and
+                                "delta" in response_json["choices"][0] and
+                                "content" in response_json[
+                                    "choices"
+                                ][0]["delta"]
+                            ):
+                                response_content += response_json[
+                                    "choices"
+                                ][0]["delta"]["content"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = response_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 with data_lock:
                     request_timestamps.append(time.time())
+                    token_counts.append(prompt_tokens+completion_tokens)
             return Response(
                 stream_with_context(generate()),
+                content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
             try:
                 prompt_tokens = response_json["usage"]["prompt_tokens"]
+                completion_tokens = response_json[
+                    "usage"
+                ]["completion_tokens"]
+                response_content = response_json[
+                    "choices"
+                ][0]["message"]["content"]
             except (KeyError, ValueError, IndexError) as e:
                 logging.error(
                     f"解析非流式响应 JSON 失败: {e}, "
                                 item.get("type") == "text"
                             ):
                                 user_content += (
+                                    item.get("text", "") + " "
                                 )
             user_content = user_content.strip()
             )
             with data_lock:
                 request_timestamps.append(time.time())
+                if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
+                    token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
+                else:
+                    token_counts.append(0)
+            return jsonify(response_json)
     except requests.exceptions.RequestException as e:
         logging.error(f"请求转发异常: {e}")
         debug=False,
         host='0.0.0.0',
         port=int(os.environ.get('PORT', 7860))
+    )