| import json | |
| import time | |
| import requests | |
| from openai import OpenAI | |
| class OnlineLLM: | |
| def __init__(self, api_key: str, base_url: str,model_name: str,default_system: str): | |
| self.api_key = api_key | |
| self.base_url = base_url | |
| self.model_name = model_name | |
| self.default_system = default_system | |
| self.client = OpenAI( | |
| api_key=self.api_key, | |
| base_url=self.base_url | |
| ) | |
| def chat(self,prompt_text, | |
| temperature: float = 0.7, | |
| max_tokens: int = 1024, | |
| top_p: float = 0.95, | |
| stream: bool = False, | |
| enable_thinking: bool = False): | |
| messages = [ | |
| {"role": "system", "content": self.default_system}, | |
| {"role": "user", "content": prompt_text}, | |
| ] | |
| response = self.client.chat.completions.create( | |
| model=self.model_name, | |
| messages=messages, | |
| stream=stream, | |
| temperature = temperature, | |
| max_tokens=max_tokens, | |
| top_p=top_p, | |
| # enable_thinking=enable_thinking 适配Qwen3动态开启推理 | |
| ) | |
| return response | |