Spaces:

MCP-1st-Birthday
/

VoiceSementle

Running

VoiceSementle / gemini_adapter.py

Sungjoon Lee

[DOCS] 문서 수정 및 디버그 코드 제거

48b92eb 9 days ago

17.7 kB

	"""
	Adapter to convert between Anthropic-style API calls and Google GenAI SDK
	"""
	from google import genai
	from google.genai import types
	import json
	import os

	# Lazy client initialization
	_client = None

	def get_client():
	"""Get or create the Gemini client"""
	global _client
	if _client is None:
	api_key = os.getenv("GOOGLE_API_KEY")
	if not api_key:
	raise ValueError("GOOGLE_API_KEY environment variable not set")
	_client = genai.Client(api_key=api_key)
	return _client

	def clean_schema_for_gemini(schema):
	"""
	Remove fields that Gemini doesn't support from JSON schema
	"""
	if not isinstance(schema, dict):
	return schema

	cleaned = {}
	# Gemini doesn't support: title, default, examples, additionalProperties
	skip_fields = {"title", "default", "examples", "additionalProperties"}

	for key, value in schema.items():
	if key in skip_fields:
	continue
	if isinstance(value, dict):
	cleaned[key] = clean_schema_for_gemini(value)
	elif isinstance(value, list):
	cleaned[key] = [clean_schema_for_gemini(item) if isinstance(item, dict) else item for item in value]
	else:
	cleaned[key] = value

	return cleaned

	def convert_tools_to_gemini_format(anthropic_tools):
	"""
	Convert Anthropic tool schema to Gemini function declarations
	"""
	function_declarations = []

	for tool in anthropic_tools:
	# Clean the input schema
	cleaned_schema = clean_schema_for_gemini(tool["input_schema"])

	# Convert to Gemini FunctionDeclaration
	func_decl = types.FunctionDeclaration(
	name=tool["name"],
	description=tool["description"],
	parameters=cleaned_schema
	)
	function_declarations.append(func_decl)

	# Wrap in Tool object
	if function_declarations:
	return [types.Tool(function_declarations=function_declarations)]
	return None

	def convert_messages_to_gemini_format(anthropic_messages):
	"""
	Convert Anthropic messages format to Gemini Content format
	"""
	contents = []

	for msg in anthropic_messages:
	role = msg["role"]
	content = msg["content"]

	# Convert role: assistant -> model
	gemini_role = "model" if role == "assistant" else "user"

	parts = []

	# Handle different content types
	if isinstance(content, str):
	# Simple text message
	parts.append(types.Part(text=content))
	elif isinstance(content, dict):
	# Could be Gradio file format {"path": ..., "mime_type": ...}
	# Skip audio/video files - they can't be sent to Gemini text API
	if content.get("path") and content.get("mime_type"):
	# print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
	continue
	# Could be text content {"type": "text", "text": "..."}
	elif content.get("type") == "text":
	parts.append(types.Part(text=content.get("text", "")))
	elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
	# Skip Gradio Audio component objects
	# print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
	continue
	elif isinstance(content, list):
	# Complex content with tool calls/results
	for item in content:
	if isinstance(item, dict):
	if item.get("type") == "text":
	parts.append(types.Part(text=item.get("text", "")))
	elif item.get("type") == "tool_use":
	# Gemini function call
	parts.append(types.Part(
	function_call=types.FunctionCall(
	name=item.get("name"),
	args=item.get("input", {})
	)
	))
	elif item.get("type") == "tool_result":
	# Gemini function response
	try:
	response_data = json.loads(item.get("content", "{}"))
	except:
	response_data = {"result": item.get("content", "")}

	parts.append(types.Part(
	function_response=types.FunctionResponse(
	name=item.get("tool_use_id", "unknown"),
	response=response_data
	)
	))
	elif hasattr(item, 'type'):
	# Anthropic SDK objects
	if item.type == "text":
	parts.append(types.Part(text=item.text))
	elif item.type == "tool_use":
	parts.append(types.Part(
	function_call=types.FunctionCall(
	name=item.name,
	args=item.input
	)
	))

	if parts:
	contents.append(types.Content(role=gemini_role, parts=parts))

	return contents

	def call_gemini_with_tools(model_name, system_prompt, messages, tools, max_tokens=2048):
	"""
	Call Gemini API with tools in Anthropic-compatible way
	"""
	try:
	# Convert tools to Gemini format
	gemini_tools = convert_tools_to_gemini_format(tools) if tools else None

	# Convert messages to Gemini format
	contents = convert_messages_to_gemini_format(messages)

	# print(f"DEBUG gemini_adapter: Converted {len(messages)} messages to {len(contents)} Contents")
	# print(f"DEBUG gemini_adapter: Tools: {len(gemini_tools[0].function_declarations) if gemini_tools else 0}")

	# Call API
	# print(f"DEBUG gemini_adapter: Calling Gemini API...")
	client = get_client()

	# Generate content (no timeout parameter - not supported)
	config = types.GenerateContentConfig(
	system_instruction=system_prompt,
	max_output_tokens=max_tokens,
	temperature=1.0,
	tools=gemini_tools
	)

	response = client.models.generate_content(
	model=model_name,
	contents=contents,
	config=config
	)

	print(f"DEBUG gemini_adapter: Got response")

	# print(f"DEBUG gemini_adapter: Response type: {type(response)}")
	# print(f"DEBUG gemini_adapter: Response has candidates: {hasattr(response, 'candidates')}")
	if hasattr(response, 'candidates') and response.candidates:
	candidate = response.candidates[0]
	# print(f"DEBUG gemini_adapter: First candidate type: {type(candidate)}")
	# print(f"DEBUG gemini_adapter: finish_reason: {getattr(candidate, 'finish_reason', 'UNKNOWN')}")

	# Log function calls if present
	if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
	for i, part in enumerate(candidate.content.parts):
	if hasattr(part, 'function_call') and part.function_call:
	# print(f"DEBUG gemini_adapter: Part {i} has function_call: {part.function_call.name}")
	print(f"DEBUG gemini_adapter: Function args: {dict(part.function_call.args) if part.function_call.args else {}}")
	return response

	except Exception as e:
	print(f"DEBUG gemini_adapter ERROR: {type(e).__name__}: {str(e)}")
	import traceback
	traceback.print_exc()
	raise

	def extract_tool_calls_from_gemini_response(response):
	"""
	Extract function calls from Gemini response
	Returns list of {name, input, id} dicts compatible with Anthropic format
	"""
	tool_calls = []

	# print(f"DEBUG extract_tool_calls: Response type: {type(response)}")
	# print(f"DEBUG extract_tool_calls: Has candidates: {hasattr(response, 'candidates')}")

	if not hasattr(response, 'candidates') or not response.candidates:
	# print(f"DEBUG extract_tool_calls: No candidates found")
	return tool_calls

	candidate = response.candidates[0]
	# print(f"DEBUG extract_tool_calls: Candidate has content: {hasattr(candidate, 'content')}")

	if not hasattr(candidate, 'content') or not candidate.content:
	# print(f"DEBUG extract_tool_calls: No content in candidate")
	return tool_calls

	# print(f"DEBUG extract_tool_calls: Content has parts: {hasattr(candidate.content, 'parts')}")

	if not hasattr(candidate.content, 'parts') or not candidate.content.parts:
	# print(f"DEBUG extract_tool_calls: No parts in content")
	return tool_calls

	for i, part in enumerate(candidate.content.parts):
	# print(f"DEBUG extract_tool_calls: Part {i} has function_call: {hasattr(part, 'function_call')}")
	if hasattr(part, 'function_call') and part.function_call:
	fc = part.function_call
	tool_calls.append({
	"name": fc.name,
	"input": dict(fc.args) if fc.args else {},
	"id": f"call_{i}"
	})

	# print(f"DEBUG extract_tool_calls: Found {len(tool_calls)} tool calls")
	return tool_calls

	def get_text_from_gemini_response(response):
	"""Extract text content from Gemini response

	Returns:
	tuple: (text, error_message) where error_message is None if successful
	"""
	# print(f"DEBUG get_text: Response type: {type(response)}")
	# print(f"DEBUG get_text: Has candidates: {hasattr(response, 'candidates')}")

	if not hasattr(response, 'candidates') or not response.candidates:
	# print(f"ERROR get_text: No candidates in response")
	return "", "No response candidates received from AI. Please try again."

	candidate = response.candidates[0]
	finish_reason = getattr(candidate, 'finish_reason', 'UNKNOWN')
	# print(f"DEBUG get_text: Candidate has content: {hasattr(candidate, 'content')}")
	# print(f"DEBUG get_text: finish_reason: {finish_reason}")

	if not hasattr(candidate, 'content') or not candidate.content:
	# print(f"WARNING get_text: No content in candidate")
	# print(f"WARNING get_text: safety_ratings: {getattr(candidate, 'safety_ratings', 'N/A')}")

	# Provide specific error messages based on finish_reason
	if finish_reason == 'SAFETY':
	return "", "Response blocked by safety filters. Please try different phrasing."
	elif finish_reason == 'RECITATION':
	return "", "Response contained copied content. Please try again."
	elif finish_reason == 'MAX_TOKENS':
	return "", "Response too long. Please try again."
	elif finish_reason != 'STOP':
	return "", f"AI response incomplete (reason: {finish_reason}). Please try again."
	else:
	return "", "AI returned empty response. Please try again."

	# print(f"DEBUG get_text: Content has parts: {hasattr(candidate.content, 'parts') if candidate.content else False}")

	if not candidate.content or not hasattr(candidate.content, 'parts') or not candidate.content.parts:
	# print(f"WARNING get_text: No parts in content")
	return "", "AI response had no content. Please try again."

	text_parts = []
	has_function_call = False
	for i, part in enumerate(candidate.content.parts):
	# print(f"DEBUG get_text: Part {i} has text: {hasattr(part, 'text')}")
	if hasattr(part, 'text') and part.text:
	text_parts.append(part.text)
	if hasattr(part, 'function_call') and part.function_call:
	has_function_call = True

	result = " ".join(text_parts)
	# print(f"DEBUG get_text: Extracted text length: {len(result)}, has_function_call: {has_function_call}")

	# Empty text is OK if there's a function call (tool-only response)
	if (not result or result.strip() == "") and not has_function_call:
	# print(f"WARNING get_text: Empty text extracted and no function call")
	return "", "AI returned empty text. Please try again."

	return result, None


	def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1024, temperature=1.0, model_name="gemini-2.5-flash"):
	"""
	Chat function with optional tool calling support

	Args:
	system_prompt: System instruction for the AI
	messages: List of message dicts with 'role' and 'content' keys
	tools: Optional list of tool definitions in Anthropic format
	max_tokens: Maximum tokens in response (default: 1024)
	temperature: Randomness in response (0.0-2.0, default: 1.0)
	model_name: Gemini model to use

	Returns:
	tuple: (response_text, tool_calls, error_message)
	- response_text: Text response from AI (or empty if tool call only)
	- tool_calls: List of tool call dicts [{name, input, id}] or None
	- error_message: Error string or None if successful
	"""
	try:
	# Convert messages to Gemini format
	contents = convert_messages_to_gemini_format(messages)

	# Convert tools to Gemini format if provided
	gemini_tools = convert_tools_to_gemini_format(tools) if tools else None

	# print(f"DEBUG chat_with_gemini_and_tools: {len(messages)} messages, {len(tools) if tools else 0} tools")

	# Get client
	client = get_client()

	# Configure generation
	# Use tool_config to encourage tool usage when tools are available
	tool_config = None
	if gemini_tools:
	# AUTO mode: Gemini decides when to call functions
	# This helps ensure it actually calls the tool when appropriate
	tool_config = types.ToolConfig(
	function_calling_config=types.FunctionCallingConfig(
	mode="AUTO"
	)
	)

	config = types.GenerateContentConfig(
	system_instruction=system_prompt,
	max_output_tokens=max_tokens,
	temperature=temperature,
	tools=gemini_tools,
	tool_config=tool_config
	)

	# Call API
	response = client.models.generate_content(
	model=model_name,
	contents=contents,
	config=config
	)

	# print(f"DEBUG chat_with_gemini_and_tools: Got response")

	# Check for tool calls first
	tool_calls = extract_tool_calls_from_gemini_response(response)

	# Extract text (may be empty if tool call only)
	text, error = get_text_from_gemini_response(response)

	# If there's an error but we have tool calls, that's OK (tool-only response)
	if error and not tool_calls:
	# print(f"ERROR chat_with_gemini_and_tools: {error}")
	return "", None, error
	elif error and tool_calls:
	# print(f"DEBUG chat_with_gemini_and_tools: Error '{error}' but have {len(tool_calls)} tool calls, proceeding")
	text = "" # Clear any error text

	# print(f"DEBUG chat_with_gemini_and_tools: text={len(text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
	return text, tool_calls if tool_calls else None, None

	except Exception as e:
	error_msg = f"Error calling Gemini: {type(e).__name__}: {str(e)}"
	print(f"ERROR chat_with_gemini_and_tools: {error_msg}")
	import traceback
	traceback.print_exc()
	return "", None, error_msg


	def chat_with_gemini(system_prompt, messages, max_tokens=1024, temperature=1.0, model_name="gemini-2.5-flash"):
	"""
	Simple chat function for conversational AI without tool calling

	Args:
	system_prompt: System instruction for the AI
	messages: List of message dicts with 'role' and 'content' keys
	max_tokens: Maximum tokens in response (default: 1024)
	temperature: Randomness in response (0.0-2.0, default: 1.0)
	model_name: Gemini model to use

	Returns:
	tuple: (response_text, error_message) where error_message is None if successful
	"""
	try:
	# Convert messages to Gemini format
	contents = convert_messages_to_gemini_format(messages)

	# print(f"DEBUG chat_with_gemini: Converted {len(messages)} messages to {len(contents)} Contents")
	# print(f"DEBUG chat_with_gemini: Calling Gemini API with model {model_name}")

	# Get client
	client = get_client()

	# Configure generation
	config = types.GenerateContentConfig(
	system_instruction=system_prompt,
	max_output_tokens=max_tokens,
	temperature=temperature
	)

	# Call API
	response = client.models.generate_content(
	model=model_name,
	contents=contents,
	config=config
	)

	# print(f"DEBUG chat_with_gemini: Got response")

	# Extract text
	text, error = get_text_from_gemini_response(response)

	if error:
	# print(f"ERROR chat_with_gemini: {error}")
	return "", error

	# print(f"DEBUG chat_with_gemini: Successfully extracted text ({len(text)} chars)")
	return text, None

	except Exception as e:
	error_msg = f"Error calling Gemini: {type(e).__name__}: {str(e)}"
	print(f"ERROR chat_with_gemini: {error_msg}")
	import traceback
	traceback.print_exc()
	return "", error_msg