ds

Paused

App Files Files Community

yangtb24 commited on Feb 4

Commit

3296e59

verified ·

1 Parent(s): 36bcac8

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -0

app.py CHANGED Viewed

@@ -368,6 +368,103 @@ def handsome_chat_completions():
         if data.get("stream", False):
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
                 reasoning_content_accumulated = ""

         if data.get("stream", False):
             def generate():
+                if model_name == "deepseek-reasoner":
+                    first_chunk_time = None
+                    full_response_content = ""
+                    for chunk in response.iter_content(chunk_size=2048):
+                        if chunk:
+                            if first_chunk_time is None:
+                                first_chunk_time = time.time()
+                            full_response_content += chunk.decode("utf-8")
+                            yield chunk
+                    end_time = time.time()
+                    first_token_time = (
+                        first_chunk_time - start_time
+                        if first_chunk_time else 0
+                    )
+                    total_time = end_time - start_time
+                    prompt_tokens = 0
+                    completion_tokens = 0
+                    response_content = ""
+                    for line in full_response_content.splitlines():
+                        if line.startswith("data:"):
+                            line = line[5:].strip()
+                            if line == "[DONE]":
+                                continue
+                            try:
+                                response_json = json.loads(line)
+                                if (
+                                    "usage" in response_json and
+                                    "completion_tokens" in response_json["usage"]
+                                ):
+                                    completion_tokens = response_json[
+                                        "usage"
+                                    ]["completion_tokens"]
+                                if (
+                                    "choices" in response_json and
+                                    len(response_json["choices"]) > 0 and
+                                    "delta" in response_json["choices"][0] and
+                                    "content" in response_json[
+                                        "choices"
+                                    ][0]["delta"]
+                                ):
+                                    response_content += response_json[
+                                        "choices"
+                                    ][0]["delta"]["content"]
+                                if (
+                                    "usage" in response_json and
+                                    "prompt_tokens" in response_json["usage"]
+                                ):
+                                    prompt_tokens = response_json[
+                                        "usage"
+                                    ]["prompt_tokens"]
+                            except (
+                                KeyError,
+                                ValueError,
+                                IndexError
+                            ) as e:
+                                logging.error(
+                                    f"解析流式响应单行 JSON 失败: {e}, "
+                                    f"行内容: {line}"
+                                )
+                    user_content = extract_user_content(data.get("messages", []))
+                    user_content_replaced = user_content.replace(
+                        '\n', '\\n'
+                    ).replace('\r', '\\n')
+                    response_content_replaced = response_content.replace(
+                        '\n', '\\n'
+                    ).replace('\r', '\\n')
+                    logging.info(
+                        f"使用的key: {api_key}, "
+                        f"提示token: {prompt_tokens}, "
+                        f"输出token: {completion_tokens}, "
+                        f"首字用时: {first_token_time:.4f}秒, "
+                        f"总共用时: {total_time:.4f}秒, "
+                        f"使用的模型: {model_name}, "
+                        f"用户的内容: {user_content_replaced}, "
+                        f"输出的内容: {response_content_replaced}"
+                    )
+                    with data_lock:
+                        request_timestamps.append(time.time())
+                        token_counts.append(prompt_tokens+completion_tokens)
+                        request_timestamps_day.append(time.time())
+                        token_counts_day.append(prompt_tokens+completion_tokens)
+                    return Response(
+                        stream_with_context(generate()),
+                        content_type=response.headers['Content-Type']
+                    )
                 first_chunk_time = None
                 full_response_content = ""
                 reasoning_content_accumulated = ""