#!/usr/bin/env python3 """ Complete example of using Qwen3-4B-toolcalling model for function calling """ import json import re from llama_cpp import Llama class Qwen3ToolCalling: def __init__(self, model_path): """Initialize the Qwen3 tool calling model""" self.llm = Llama( model_path=model_path, n_ctx=2048, n_threads=8, n_batch=512, temperature=0.7, top_p=0.8, repeat_penalty=1.1, verbose=False, ) def extract_tool_calls(self, text): """Extract tool calls from model response""" tool_calls = [] # Look for JSON-like structures in the response json_pattern = r'\[.*?\]' matches = re.findall(json_pattern, text) for match in matches: try: parsed = json.loads(match) if isinstance(parsed, list): for item in parsed: if isinstance(item, dict) and 'name' in item: tool_calls.append(item) except json.JSONDecodeError: continue return tool_calls def chat(self, message, system_message=None): """Chat with the model and extract tool calls""" # Build the prompt prompt_parts = [] if system_message: prompt_parts.append(f"<|im_start|>system\n{system_message}<|im_end|>") prompt_parts.append(f"<|im_start|>user\n{message}<|im_end|>") prompt_parts.append("<|im_start|>assistant\n") formatted_prompt = "\n".join(prompt_parts) # Generate response response = self.llm( formatted_prompt, max_tokens=512, stop=["<|im_end|>", "<|im_start|>"], temperature=0.7 ) response_text = response['choices'][0]['text'] tool_calls = self.extract_tool_calls(response_text) return { 'response': response_text, 'tool_calls': tool_calls } def main(): """Main function to demonstrate tool calling""" # Initialize the model model_path = "/home/user/work/Qwen3-4B-toolcalling-gguf-codex/Qwen3-4B-Function-Calling-Pro.gguf" qwen = Qwen3ToolCalling(model_path) print("šŸš€ Qwen3-4B Tool Calling Demo") print("=" * 50) # Test cases test_cases = [ "What's the weather like in London?", "Find me a hotel in Paris for next week", "Calculate 25 + 17", "Book a flight from New York to Tokyo", "Get the latest news about AI" ] for i, message in enumerate(test_cases, 1): print(f"\nšŸ“ Test {i}: {message}") print("-" * 40) result = qwen.chat(message) print(f"Response: {result['response']}") if result['tool_calls']: print(f"\nšŸ”§ Tool Calls ({len(result['tool_calls'])}):") for j, tool_call in enumerate(result['tool_calls'], 1): print(f" {j}. {tool_call['name']}") print(f" Arguments: {tool_call.get('arguments', {})}") else: print("\nāŒ No tool calls detected") if __name__ == "__main__": main()