First_agent_template

Sleeping

App Files Files Community

2stacks Claude commited on Nov 11

Commit

5c62b72

verified ·

1 Parent(s): 4efbaed

Add environment variable configuration and enhanced Ollama model verification

Browse files

- Add environment variables (HF_MODEL_ID, OLLAMA_BASE_URL, OLLAMA_MODEL_ID) for flexible model configuration
- Enhance is_ollama_available() to verify both service availability and model existence
- Add detailed logging to diagnose Ollama connection and model availability issues
- Update README.md with configuration documentation and Docker environment variable examples
- Add python-dotenv dependency for environment variable management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (3) hide show

README.md +26 -2
app.py +63 -13
requirements.txt +5 -3

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: ⚡
 colorFrom: pink
 colorTo: yellow
 sdk: gradio
-sdk_version: 5.23.1
 app_file: app.py
 pinned: false
 tags:
@@ -31,6 +31,27 @@ python -m venv env
 source env/bin/activate
 ```
 ## Install dependencies and run
 ```shell
@@ -43,6 +64,9 @@ python app.py
 ```shell
 docker run -it -p 7860:7860 \
     --platform=linux/amd64 \
-    --environment HF_TOKEN="YOUR_VALUE_HERE" \
     registry.hf.space/2stacks-first-agent-template:latest python app.py
 ```

 colorFrom: pink
 colorTo: yellow
 sdk: gradio
+sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 tags:
 source env/bin/activate
 ```
+## Configuration (Optional)
+The application uses environment variables for model configuration. Create a `.env` file in the project root to customize settings:
+```shell
+# Ollama configuration (for local models)
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL_ID=qwen2.5-coder:32b
+# HuggingFace configuration (fallback when Ollama is unavailable)
+HF_MODEL_ID=Qwen/Qwen2.5-Coder-32B-Instruct
+```
+**Environment Variables:**
+- `OLLAMA_BASE_URL`: URL for your Ollama service (default: `http://localhost:11434`)
+- `OLLAMA_MODEL_ID`: Model name in Ollama (default: `qwen2.5-coder:32b`)
+- `HF_MODEL_ID`: HuggingFace model to use as fallback (default: `Qwen/Qwen2.5-Coder-32B-Instruct`)
+The app automatically checks if Ollama is available with the specified model. If not, it falls back to HuggingFace.
 ## Install dependencies and run
 ```shell
 ```shell
 docker run -it -p 7860:7860 \
     --platform=linux/amd64 \
+    -e HF_TOKEN="YOUR_VALUE_HERE" \
+    -e OLLAMA_BASE_URL="http://localhost:11434" \
+    -e OLLAMA_MODEL_ID="qwen2.5-coder:32b" \
+    -e HF_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" \
     registry.hf.space/2stacks-first-agent-template:latest python app.py
 ```

app.py CHANGED Viewed

@@ -1,10 +1,49 @@
-from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, tool
-#import requests
 import pytz
 import yaml
-from datetime import datetime
 from Gradio_UI import GradioUI
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
@@ -49,16 +88,27 @@ search_tool = DuckDuckGoSearchTool(max_results=5, rate_limit=2.0)
 # Instantiate the FinalAnswerTool
 final_answer = FinalAnswerTool()
-# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
-# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
-model = InferenceClientModel(
-    max_tokens=2096,
-    temperature=0.5,
-    model_id='Qwen/Qwen2.5-Coder-32B-Instruct', # it is possible that this model may be overloaded
-    #model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
-    custom_role_conversions=None,
-)
 with open("prompts.yaml", 'r') as stream:
     prompt_templates = yaml.safe_load(stream)

+from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, InferenceClientModel, LiteLLMModel, tool
+import os
+import requests
 import pytz
 import yaml
+from datetime import datetime
 from Gradio_UI import GradioUI
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Configuration
+HF_MODEL_ID     = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-Coder-32B-Instruct")
+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+OLLAMA_MODEL_ID = os.getenv("OLLAMA_MODEL_ID", "qwen2.5-coder:32b")
+def is_ollama_available(base_url=None, timeout=2):
+    """Check if Ollama service is running and the specified model exists."""
+    if base_url is None:
+        base_url = OLLAMA_BASE_URL
+    try:
+        response = requests.get(f"{base_url}/api/tags", timeout=timeout)
+        if response.status_code != 200:
+            print(f"Ollama service check failed: HTTP {response.status_code} from {base_url}/api/tags")
+            return False
+        # Parse the response to get available models
+        data = response.json()
+        available_models = [model.get('name', '') for model in data.get('models', [])]
+        # Check if the model exists in available models
+        if OLLAMA_MODEL_ID not in available_models:
+            print(f"Model '{OLLAMA_MODEL_ID}' not found in Ollama.")
+            print(f"Available models: {', '.join(available_models) if available_models else 'None'}")
+            return False
+        print(f"Ollama service is available and model '{OLLAMA_MODEL_ID}' found.")
+        return True
+    except (requests.RequestException, ConnectionError) as e:
+        print(f"Failed to connect to Ollama service at {base_url}: {type(e).__name__}: {e}")
+        return False
+    except (ValueError, KeyError) as e:
+        print(f"Failed to parse Ollama API response: {type(e).__name__}: {e}")
+        return False
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
 # Instantiate the FinalAnswerTool
 final_answer = FinalAnswerTool()
+# Check if Ollama is available and configure the model accordingly
+if is_ollama_available():
+    print("Ollama detected - using LiteLLMModel with local Ollama instance")
+    model = LiteLLMModel(
+        model_id=f"ollama_chat/{OLLAMA_MODEL_ID}",  # Adjust model name based on what you have in Ollama
+        api_base=OLLAMA_BASE_URL,
+        api_key="ollama",
+        num_ctx=8192,  # Important: Ollama's default 2048 may cause failures
+        max_tokens=2096,
+        temperature=0.5,
+    )
+else:
+    print("Ollama not available - falling back to InferenceClientModel")
+    # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
+    # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
+    model = InferenceClientModel(
+        max_tokens=2096,
+        temperature=0.5,
+        model_id=HF_MODEL_ID,  # it is possible that this model may be overloaded
+        custom_role_conversions=None,
+    )
 with open("prompts.yaml", 'r') as stream:
     prompt_templates = yaml.safe_load(stream)

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
 markdownify
 smolagents
-#requests
-duckduckgo_search
-ddgs
 smolagents[gradio]

+ddgs
+duckduckgo_search
 markdownify
+python-dotenv
+requests
 smolagents
 smolagents[gradio]
+smolagents[litellm]