File size: 3,343 Bytes

#!/usr/bin/env python3
"""
Complete example of using Qwen3-4B-toolcalling model for function calling
"""

import json
import re
from llama_cpp import Llama

class Qwen3ToolCalling:
    def __init__(self, model_path):
        """Initialize the Qwen3 tool calling model"""
        self.llm = Llama(
            model_path=model_path,
            n_ctx=2048,
            n_threads=8,
            n_batch=512,
            temperature=0.7,
            top_p=0.8,
            repeat_penalty=1.1,
            verbose=False,
        )
    
    def extract_tool_calls(self, text):
        """Extract tool calls from model response"""
        tool_calls = []
        
        # Look for JSON-like structures in the response
        json_pattern = r'\[.*?\]'
        matches = re.findall(json_pattern, text)
        
        for match in matches:
            try:
                parsed = json.loads(match)
                if isinstance(parsed, list):
                    for item in parsed:
                        if isinstance(item, dict) and 'name' in item:
                            tool_calls.append(item)
            except json.JSONDecodeError:
                continue
        
        return tool_calls
    
    def chat(self, message, system_message=None):
        """Chat with the model and extract tool calls"""
        
        # Build the prompt
        prompt_parts = []
        
        if system_message:
            prompt_parts.append(f"<|im_start|>system\n{system_message}<|im_end|>")
        
        prompt_parts.append(f"<|im_start|>user\n{message}<|im_end|>")
        prompt_parts.append("<|im_start|>assistant\n")
        
        formatted_prompt = "\n".join(prompt_parts)
        
        # Generate response
        response = self.llm(
            formatted_prompt,
            max_tokens=512,
            stop=["<|im_end|>", "<|im_start|>"],
            temperature=0.7
        )
        
        response_text = response['choices'][0]['text']
        tool_calls = self.extract_tool_calls(response_text)
        
        return {
            'response': response_text,
            'tool_calls': tool_calls
        }

def main():
    """Main function to demonstrate tool calling"""
    
    # Initialize the model
    model_path = "/home/user/work/Qwen3-4B-toolcalling-gguf-codex/Qwen3-4B-Function-Calling-Pro.gguf"
    qwen = Qwen3ToolCalling(model_path)
    
    print("🚀 Qwen3-4B Tool Calling Demo")
    print("=" * 50)
    
    # Test cases
    test_cases = [
        "What's the weather like in London?",
        "Find me a hotel in Paris for next week",
        "Calculate 25 + 17",
        "Book a flight from New York to Tokyo",
        "Get the latest news about AI"
    ]
    
    for i, message in enumerate(test_cases, 1):
        print(f"\n📝 Test {i}: {message}")
        print("-" * 40)
        
        result = qwen.chat(message)
        
        print(f"Response: {result['response']}")
        
        if result['tool_calls']:
            print(f"\n🔧 Tool Calls ({len(result['tool_calls'])}):")
            for j, tool_call in enumerate(result['tool_calls'], 1):
                print(f"  {j}. {tool_call['name']}")
                print(f"     Arguments: {tool_call.get('arguments', {})}")
        else:
            print("\n❌ No tool calls detected")

if __name__ == "__main__":
    main()