VoiceSementle / gemini_adapter.py
Sungjoon Lee
[DOCS] ๋ฌธ์„œ ์ˆ˜์ • ๋ฐ ๋””๋ฒ„๊ทธ ์ฝ”๋“œ ์ œ๊ฑฐ
48b92eb
raw
history blame
17.7 kB
"""
Adapter to convert between Anthropic-style API calls and Google GenAI SDK
"""
from google import genai
from google.genai import types
import json
import os
# Lazy client initialization
_client = None
def get_client():
"""Get or create the Gemini client"""
global _client
if _client is None:
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY environment variable not set")
_client = genai.Client(api_key=api_key)
return _client
def clean_schema_for_gemini(schema):
"""
Remove fields that Gemini doesn't support from JSON schema
"""
if not isinstance(schema, dict):
return schema
cleaned = {}
# Gemini doesn't support: title, default, examples, additionalProperties
skip_fields = {"title", "default", "examples", "additionalProperties"}
for key, value in schema.items():
if key in skip_fields:
continue
if isinstance(value, dict):
cleaned[key] = clean_schema_for_gemini(value)
elif isinstance(value, list):
cleaned[key] = [clean_schema_for_gemini(item) if isinstance(item, dict) else item for item in value]
else:
cleaned[key] = value
return cleaned
def convert_tools_to_gemini_format(anthropic_tools):
"""
Convert Anthropic tool schema to Gemini function declarations
"""
function_declarations = []
for tool in anthropic_tools:
# Clean the input schema
cleaned_schema = clean_schema_for_gemini(tool["input_schema"])
# Convert to Gemini FunctionDeclaration
func_decl = types.FunctionDeclaration(
name=tool["name"],
description=tool["description"],
parameters=cleaned_schema
)
function_declarations.append(func_decl)
# Wrap in Tool object
if function_declarations:
return [types.Tool(function_declarations=function_declarations)]
return None
def convert_messages_to_gemini_format(anthropic_messages):
"""
Convert Anthropic messages format to Gemini Content format
"""
contents = []
for msg in anthropic_messages:
role = msg["role"]
content = msg["content"]
# Convert role: assistant -> model
gemini_role = "model" if role == "assistant" else "user"
parts = []
# Handle different content types
if isinstance(content, str):
# Simple text message
parts.append(types.Part(text=content))
elif isinstance(content, dict):
# Could be Gradio file format {"path": ..., "mime_type": ...}
# Skip audio/video files - they can't be sent to Gemini text API
if content.get("path") and content.get("mime_type"):
# print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
continue
# Could be text content {"type": "text", "text": "..."}
elif content.get("type") == "text":
parts.append(types.Part(text=content.get("text", "")))
elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
# Skip Gradio Audio component objects
# print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
continue
elif isinstance(content, list):
# Complex content with tool calls/results
for item in content:
if isinstance(item, dict):
if item.get("type") == "text":
parts.append(types.Part(text=item.get("text", "")))
elif item.get("type") == "tool_use":
# Gemini function call
parts.append(types.Part(
function_call=types.FunctionCall(
name=item.get("name"),
args=item.get("input", {})
)
))
elif item.get("type") == "tool_result":
# Gemini function response
try:
response_data = json.loads(item.get("content", "{}"))
except:
response_data = {"result": item.get("content", "")}
parts.append(types.Part(
function_response=types.FunctionResponse(
name=item.get("tool_use_id", "unknown"),
response=response_data
)
))
elif hasattr(item, 'type'):
# Anthropic SDK objects
if item.type == "text":
parts.append(types.Part(text=item.text))
elif item.type == "tool_use":
parts.append(types.Part(
function_call=types.FunctionCall(
name=item.name,
args=item.input
)
))
if parts:
contents.append(types.Content(role=gemini_role, parts=parts))
return contents
def call_gemini_with_tools(model_name, system_prompt, messages, tools, max_tokens=2048):
"""
Call Gemini API with tools in Anthropic-compatible way
"""
try:
# Convert tools to Gemini format
gemini_tools = convert_tools_to_gemini_format(tools) if tools else None
# Convert messages to Gemini format
contents = convert_messages_to_gemini_format(messages)
# print(f"DEBUG gemini_adapter: Converted {len(messages)} messages to {len(contents)} Contents")
# print(f"DEBUG gemini_adapter: Tools: {len(gemini_tools[0].function_declarations) if gemini_tools else 0}")
# Call API
# print(f"DEBUG gemini_adapter: Calling Gemini API...")
client = get_client()
# Generate content (no timeout parameter - not supported)
config = types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=1.0,
tools=gemini_tools
)
response = client.models.generate_content(
model=model_name,
contents=contents,
config=config
)
print(f"DEBUG gemini_adapter: Got response")
# print(f"DEBUG gemini_adapter: Response type: {type(response)}")
# print(f"DEBUG gemini_adapter: Response has candidates: {hasattr(response, 'candidates')}")
if hasattr(response, 'candidates') and response.candidates:
candidate = response.candidates[0]
# print(f"DEBUG gemini_adapter: First candidate type: {type(candidate)}")
# print(f"DEBUG gemini_adapter: finish_reason: {getattr(candidate, 'finish_reason', 'UNKNOWN')}")
# Log function calls if present
if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
for i, part in enumerate(candidate.content.parts):
if hasattr(part, 'function_call') and part.function_call:
# print(f"DEBUG gemini_adapter: Part {i} has function_call: {part.function_call.name}")
print(f"DEBUG gemini_adapter: Function args: {dict(part.function_call.args) if part.function_call.args else {}}")
return response
except Exception as e:
print(f"DEBUG gemini_adapter ERROR: {type(e).__name__}: {str(e)}")
import traceback
traceback.print_exc()
raise
def extract_tool_calls_from_gemini_response(response):
"""
Extract function calls from Gemini response
Returns list of {name, input, id} dicts compatible with Anthropic format
"""
tool_calls = []
# print(f"DEBUG extract_tool_calls: Response type: {type(response)}")
# print(f"DEBUG extract_tool_calls: Has candidates: {hasattr(response, 'candidates')}")
if not hasattr(response, 'candidates') or not response.candidates:
# print(f"DEBUG extract_tool_calls: No candidates found")
return tool_calls
candidate = response.candidates[0]
# print(f"DEBUG extract_tool_calls: Candidate has content: {hasattr(candidate, 'content')}")
if not hasattr(candidate, 'content') or not candidate.content:
# print(f"DEBUG extract_tool_calls: No content in candidate")
return tool_calls
# print(f"DEBUG extract_tool_calls: Content has parts: {hasattr(candidate.content, 'parts')}")
if not hasattr(candidate.content, 'parts') or not candidate.content.parts:
# print(f"DEBUG extract_tool_calls: No parts in content")
return tool_calls
for i, part in enumerate(candidate.content.parts):
# print(f"DEBUG extract_tool_calls: Part {i} has function_call: {hasattr(part, 'function_call')}")
if hasattr(part, 'function_call') and part.function_call:
fc = part.function_call
tool_calls.append({
"name": fc.name,
"input": dict(fc.args) if fc.args else {},
"id": f"call_{i}"
})
# print(f"DEBUG extract_tool_calls: Found {len(tool_calls)} tool calls")
return tool_calls
def get_text_from_gemini_response(response):
"""Extract text content from Gemini response
Returns:
tuple: (text, error_message) where error_message is None if successful
"""
# print(f"DEBUG get_text: Response type: {type(response)}")
# print(f"DEBUG get_text: Has candidates: {hasattr(response, 'candidates')}")
if not hasattr(response, 'candidates') or not response.candidates:
# print(f"ERROR get_text: No candidates in response")
return "", "No response candidates received from AI. Please try again."
candidate = response.candidates[0]
finish_reason = getattr(candidate, 'finish_reason', 'UNKNOWN')
# print(f"DEBUG get_text: Candidate has content: {hasattr(candidate, 'content')}")
# print(f"DEBUG get_text: finish_reason: {finish_reason}")
if not hasattr(candidate, 'content') or not candidate.content:
# print(f"WARNING get_text: No content in candidate")
# print(f"WARNING get_text: safety_ratings: {getattr(candidate, 'safety_ratings', 'N/A')}")
# Provide specific error messages based on finish_reason
if finish_reason == 'SAFETY':
return "", "Response blocked by safety filters. Please try different phrasing."
elif finish_reason == 'RECITATION':
return "", "Response contained copied content. Please try again."
elif finish_reason == 'MAX_TOKENS':
return "", "Response too long. Please try again."
elif finish_reason != 'STOP':
return "", f"AI response incomplete (reason: {finish_reason}). Please try again."
else:
return "", "AI returned empty response. Please try again."
# print(f"DEBUG get_text: Content has parts: {hasattr(candidate.content, 'parts') if candidate.content else False}")
if not candidate.content or not hasattr(candidate.content, 'parts') or not candidate.content.parts:
# print(f"WARNING get_text: No parts in content")
return "", "AI response had no content. Please try again."
text_parts = []
has_function_call = False
for i, part in enumerate(candidate.content.parts):
# print(f"DEBUG get_text: Part {i} has text: {hasattr(part, 'text')}")
if hasattr(part, 'text') and part.text:
text_parts.append(part.text)
if hasattr(part, 'function_call') and part.function_call:
has_function_call = True
result = " ".join(text_parts)
# print(f"DEBUG get_text: Extracted text length: {len(result)}, has_function_call: {has_function_call}")
# Empty text is OK if there's a function call (tool-only response)
if (not result or result.strip() == "") and not has_function_call:
# print(f"WARNING get_text: Empty text extracted and no function call")
return "", "AI returned empty text. Please try again."
return result, None
def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1024, temperature=1.0, model_name="gemini-2.5-flash"):
"""
Chat function with optional tool calling support
Args:
system_prompt: System instruction for the AI
messages: List of message dicts with 'role' and 'content' keys
tools: Optional list of tool definitions in Anthropic format
max_tokens: Maximum tokens in response (default: 1024)
temperature: Randomness in response (0.0-2.0, default: 1.0)
model_name: Gemini model to use
Returns:
tuple: (response_text, tool_calls, error_message)
- response_text: Text response from AI (or empty if tool call only)
- tool_calls: List of tool call dicts [{name, input, id}] or None
- error_message: Error string or None if successful
"""
try:
# Convert messages to Gemini format
contents = convert_messages_to_gemini_format(messages)
# Convert tools to Gemini format if provided
gemini_tools = convert_tools_to_gemini_format(tools) if tools else None
# print(f"DEBUG chat_with_gemini_and_tools: {len(messages)} messages, {len(tools) if tools else 0} tools")
# Get client
client = get_client()
# Configure generation
# Use tool_config to encourage tool usage when tools are available
tool_config = None
if gemini_tools:
# AUTO mode: Gemini decides when to call functions
# This helps ensure it actually calls the tool when appropriate
tool_config = types.ToolConfig(
function_calling_config=types.FunctionCallingConfig(
mode="AUTO"
)
)
config = types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=temperature,
tools=gemini_tools,
tool_config=tool_config
)
# Call API
response = client.models.generate_content(
model=model_name,
contents=contents,
config=config
)
# print(f"DEBUG chat_with_gemini_and_tools: Got response")
# Check for tool calls first
tool_calls = extract_tool_calls_from_gemini_response(response)
# Extract text (may be empty if tool call only)
text, error = get_text_from_gemini_response(response)
# If there's an error but we have tool calls, that's OK (tool-only response)
if error and not tool_calls:
# print(f"ERROR chat_with_gemini_and_tools: {error}")
return "", None, error
elif error and tool_calls:
# print(f"DEBUG chat_with_gemini_and_tools: Error '{error}' but have {len(tool_calls)} tool calls, proceeding")
text = "" # Clear any error text
# print(f"DEBUG chat_with_gemini_and_tools: text={len(text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
return text, tool_calls if tool_calls else None, None
except Exception as e:
error_msg = f"Error calling Gemini: {type(e).__name__}: {str(e)}"
print(f"ERROR chat_with_gemini_and_tools: {error_msg}")
import traceback
traceback.print_exc()
return "", None, error_msg
def chat_with_gemini(system_prompt, messages, max_tokens=1024, temperature=1.0, model_name="gemini-2.5-flash"):
"""
Simple chat function for conversational AI without tool calling
Args:
system_prompt: System instruction for the AI
messages: List of message dicts with 'role' and 'content' keys
max_tokens: Maximum tokens in response (default: 1024)
temperature: Randomness in response (0.0-2.0, default: 1.0)
model_name: Gemini model to use
Returns:
tuple: (response_text, error_message) where error_message is None if successful
"""
try:
# Convert messages to Gemini format
contents = convert_messages_to_gemini_format(messages)
# print(f"DEBUG chat_with_gemini: Converted {len(messages)} messages to {len(contents)} Contents")
# print(f"DEBUG chat_with_gemini: Calling Gemini API with model {model_name}")
# Get client
client = get_client()
# Configure generation
config = types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=temperature
)
# Call API
response = client.models.generate_content(
model=model_name,
contents=contents,
config=config
)
# print(f"DEBUG chat_with_gemini: Got response")
# Extract text
text, error = get_text_from_gemini_response(response)
if error:
# print(f"ERROR chat_with_gemini: {error}")
return "", error
# print(f"DEBUG chat_with_gemini: Successfully extracted text ({len(text)} chars)")
return text, None
except Exception as e:
error_msg = f"Error calling Gemini: {type(e).__name__}: {str(e)}"
print(f"ERROR chat_with_gemini: {error_msg}")
import traceback
traceback.print_exc()
return "", error_msg