Spaces:
Sleeping
Sleeping
File size: 4,828 Bytes
5c62b72 9b5b26a c19d193 8fe992b 5c62b72 9b5b26a 5c62b72 9b5b26a 19d3f03 9b5b26a 4efbaed 8c01ffb 0c6f69a 8c01ffb 0c6f69a 6aae614 ae7a494 5c62b72 8c01ffb 861422e 9b5b26a 8c01ffb 8fe992b 0c6f69a 4efbaed 0c6f69a 8c01ffb 861422e 8fe992b 9b5b26a 0c6f69a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, LiteLLMModel, tool
import os
import requests
import pytz
import yaml
from datetime import datetime
from Gradio_UI import GradioUI
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Configuration
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-Coder-32B-Instruct")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
OLLAMA_MODEL_ID = os.getenv("OLLAMA_MODEL_ID", "qwen2.5-coder:32b")
def is_ollama_available(base_url=None, timeout=2):
"""Check if Ollama service is running and the specified model exists."""
if base_url is None:
base_url = OLLAMA_BASE_URL
try:
response = requests.get(f"{base_url}/api/tags", timeout=timeout)
if response.status_code != 200:
print(f"Ollama service check failed: HTTP {response.status_code} from {base_url}/api/tags")
return False
# Parse the response to get available models
data = response.json()
available_models = [model.get('name', '') for model in data.get('models', [])]
# Check if the model exists in available models
if OLLAMA_MODEL_ID not in available_models:
print(f"Model '{OLLAMA_MODEL_ID}' not found in Ollama.")
print(f"Available models: {', '.join(available_models) if available_models else 'None'}")
return False
print(f"Ollama service is available and model '{OLLAMA_MODEL_ID}' found.")
return True
except (requests.RequestException, ConnectionError) as e:
print(f"Failed to connect to Ollama service at {base_url}: {type(e).__name__}: {e}")
return False
except (ValueError, KeyError) as e:
print(f"Failed to parse Ollama API response: {type(e).__name__}: {e}")
return False
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
@tool
def write_to_markdown(content: str, filename: str) -> None:
"""
Write the given content to a markdown file.
Args:
content: The text content to write to the file
filename: The name of the markdown file (will automatically add .md extension if not present)
Example:
>>> write_to_markdown("# Hello World\\nThis is a test.", "output.md")
"""
# Add .md extension if not present
if not filename.endswith('.md'):
filename += '.md'
# Write content to file
with open(filename, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Content successfully written to {filename}")
# Instantiate the DuckDuckGoSearchTool
search_tool = DuckDuckGoSearchTool(max_results=5, rate_limit=2.0)
# Instantiate the FinalAnswerTool
final_answer = FinalAnswerTool()
# Check if Ollama is available and configure the model accordingly
if is_ollama_available():
print("Ollama detected - using LiteLLMModel with local Ollama instance")
model = LiteLLMModel(
model_id=f"ollama_chat/{OLLAMA_MODEL_ID}", # Adjust model name based on what you have in Ollama
api_base=OLLAMA_BASE_URL,
api_key="ollama",
num_ctx=8192, # Important: Ollama's default 2048 may cause failures
max_tokens=2096,
temperature=0.5,
)
else:
print("Ollama not available - falling back to InferenceClientModel")
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = InferenceClientModel(
max_tokens=2096,
temperature=0.5,
model_id=HF_MODEL_ID, # it is possible that this model may be overloaded
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[
final_answer,
search_tool,
get_current_time_in_timezone,
write_to_markdown,
], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()
|