Spaces:

AIencoder
/

Forgekit

Sleeping

App Files Files Community

AIencoder commited on 11 days ago

Commit

964120b

verified ·

1 Parent(s): 816c4d5

Create forgekit/ai_advisor.py

Browse files

Files changed (1) hide show

forgekit/ai_advisor.py +224 -0

forgekit/ai_advisor.py ADDED Viewed

	@@ -0,0 +1,224 @@

+"""AI-powered merge advisor using HuggingFace Inference API."""
+import json
+import requests
+from typing import Optional
+HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
+DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
+def _query_llm(
+    prompt: str,
+    system: str = "",
+    model: str = DEFAULT_MODEL,
+    token: Optional[str] = None,
+    max_tokens: int = 800,
+) -> str:
+    """Query an LLM via HF Inference API.
+    Args:
+        prompt: User message
+        system: System prompt
+        model: HF model ID for inference
+        token: HF API token (recommended for higher rate limits)
+        max_tokens: Max response length
+    Returns:
+        Generated text response
+    """
+    headers = {"Content-Type": "application/json"}
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    # Format as chat messages
+    messages = []
+    if system:
+        messages.append({"role": "system", "content": system})
+    messages.append({"role": "user", "content": prompt})
+    payload = {
+        "inputs": _format_chat(messages, model),
+        "parameters": {
+            "max_new_tokens": max_tokens,
+            "temperature": 0.7,
+            "do_sample": True,
+            "return_full_text": False,
+        },
+    }
+    try:
+        resp = requests.post(
+            f"{HF_INFERENCE_URL}/{model}",
+            headers=headers,
+            json=payload,
+            timeout=60,
+        )
+        if resp.status_code == 503:
+            # Model loading
+            return "⏳ The AI model is loading (this can take 1-2 minutes on first use). Please try again shortly."
+        if resp.status_code == 429:
+            return "⚠️ Rate limited — please wait a moment and try again, or add your HF token for higher limits."
+        if resp.status_code != 200:
+            return f"⚠️ AI service returned status {resp.status_code}. Try again or add an HF token."
+        data = resp.json()
+        if isinstance(data, list) and len(data) > 0:
+            text = data[0].get("generated_text", "")
+            # Clean up any leftover template tokens
+            for tag in ["</s>", "<|im_end|>", "<|eot_id|>", "[/INST]"]:
+                text = text.replace(tag, "")
+            return text.strip()
+        return "⚠️ No response generated. The model may be overloaded — try again."
+    except requests.exceptions.Timeout:
+        return "⚠️ Request timed out. The model may be loading — try again in a minute."
+    except Exception as e:
+        return f"⚠️ Error: {str(e)}"
+def _format_chat(messages: list[dict], model: str) -> str:
+    """Format messages into the model's expected chat template."""
+    # Mistral Instruct format
+    if "mistral" in model.lower() or "mixtral" in model.lower():
+        parts = []
+        for msg in messages:
+            if msg["role"] == "system":
+                parts.append(f"[INST] {msg['content']}\n")
+            elif msg["role"] == "user":
+                if parts:
+                    parts.append(f"{msg['content']} [/INST]")
+                else:
+                    parts.append(f"[INST] {msg['content']} [/INST]")
+        return "".join(parts)
+    # Generic ChatML fallback
+    parts = []
+    for msg in messages:
+        parts.append(f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>")
+    parts.append("<|im_start|>assistant\n")
+    return "\n".join(parts)
+# ===== AI FEATURES =====
+ADVISOR_SYSTEM = """You are ForgeKit AI, an expert assistant for merging large language models. You have deep knowledge of mergekit, model architectures, merge methods (DARE-TIES, TIES, SLERP, Linear, Task Arithmetic, Passthrough), and best practices for creating high-quality merged models.
+Be concise, practical, and specific. Give actionable recommendations with concrete numbers (weights, densities). Format your response with clear sections using markdown."""
+def merge_advisor(
+    models_text: str,
+    goal: str = "",
+    token: Optional[str] = None,
+) -> str:
+    """AI recommends the best merge method, weights, and configuration.
+    Args:
+        models_text: Newline-separated model IDs
+        goal: What the user wants the merged model to do
+        token: HF API token
+    Returns:
+        AI recommendation as markdown
+    """
+    models = [m.strip() for m in models_text.strip().split("\n") if m.strip()]
+    if len(models) < 2:
+        return "⚠️ Add at least 2 models to get a recommendation."
+    models_str = "\n".join(f"- {m}" for m in models)
+    goal_str = f"\n\nUser's goal: {goal}" if goal.strip() else ""
+    prompt = f"""I want to merge these models:
+{models_str}
+{goal_str}
+Recommend:
+1. **Best merge method** and why (DARE-TIES, SLERP, Linear, TIES, Task Arithmetic, or Passthrough)
+2. **Optimal weights** for each model (with reasoning)
+3. **Density values** if applicable
+4. **Which model to use as base** and why
+5. **Which tokenizer** to keep
+6. **Any warnings** or tips specific to these models
+Be specific with numbers and keep it practical."""
+    return _query_llm(prompt, system=ADVISOR_SYSTEM, token=token)
+def model_describer(
+    models_text: str,
+    method: str = "",
+    weights_text: str = "",
+    token: Optional[str] = None,
+) -> str:
+    """AI explains what the merged model will be good at.
+    Args:
+        models_text: Newline-separated model IDs
+        method: Merge method being used
+        weights_text: Comma-separated weights
+        token: HF API token
+    Returns:
+        AI description of expected capabilities
+    """
+    models = [m.strip() for m in models_text.strip().split("\n") if m.strip()]
+    if not models:
+        return "⚠️ Add models first."
+    models_str = "\n".join(f"- {m}" for m in models)
+    method_str = f" using {method}" if method else ""
+    weights_str = f"\nWeights: {weights_text}" if weights_text.strip() else ""
+    prompt = f"""I'm merging these models{method_str}:
+{models_str}{weights_str}
+Based on what each source model is known for, describe:
+1. **What the merged model will excel at** (specific tasks/benchmarks)
+2. **What it might struggle with** compared to the source models
+3. **Ideal use cases** for this merge
+4. **Expected quality** compared to each individual model
+5. **A creative name suggestion** for this merge
+Keep it concise and practical."""
+    return _query_llm(prompt, system=ADVISOR_SYSTEM, token=token)
+def config_explainer(
+    yaml_config: str,
+    token: Optional[str] = None,
+) -> str:
+    """AI explains a YAML merge config in plain English.
+    Args:
+        yaml_config: The YAML configuration string
+        token: HF API token
+    Returns:
+        Plain English explanation
+    """
+    if not yaml_config.strip() or yaml_config.startswith("# Add"):
+        return "⚠️ Generate a YAML config first."
+    prompt = f"""Explain this mergekit YAML configuration in plain English. Break it down so someone new to model merging can understand exactly what will happen:
+```yaml
+{yaml_config}
+```
+Explain:
+1. **What this config does** in simple terms
+2. **Why these specific settings** were chosen (method, weights, density)
+3. **What the output model will be like**
+4. **Any potential issues** to watch out for
+5. **Estimated resource requirements** (RAM, time)
+Be clear and beginner-friendly."""
+    return _query_llm(prompt, system=ADVISOR_SYSTEM, token=token)