Spaces:
Sleeping
Sleeping
| """ | |
| CoJournalist Data - Swiss Parliamentary Data Chatbot | |
| Powered by Llama-3.1-8B-Instruct and OpenParlData MCP | |
| """ | |
| import os | |
| import json | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from dotenv import load_dotenv | |
| from mcp_integration import execute_mcp_query, OpenParlDataClient | |
| import asyncio | |
| from usage_tracker import UsageTracker | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize Hugging Face Inference Client | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if not HF_TOKEN: | |
| print("Warning: HF_TOKEN not found. Please set it in .env file or Hugging Face Space secrets.") | |
| client = InferenceClient(token=HF_TOKEN) | |
| # Initialize usage tracker with 50 requests per day limit | |
| tracker = UsageTracker(daily_limit=50) | |
| # Available languages | |
| LANGUAGES = { | |
| "English": "en", | |
| "Deutsch": "de", | |
| "Français": "fr", | |
| "Italiano": "it" | |
| } | |
| # System prompt for Llama-3.1-8B-Instruct | |
| SYSTEM_PROMPT = """You are a helpful assistant that helps users query Swiss parliamentary data. | |
| You have access to the following tools from the OpenParlData MCP server: | |
| 1. **openparldata_search_parliamentarians** - Search for Swiss parliamentarians | |
| Parameters: query (name/party), canton (2-letter code), party, active_only, language, limit | |
| 2. **openparldata_get_parliamentarian** - Get detailed info about a specific parliamentarian | |
| Parameters: person_id, include_votes, include_motions, language | |
| 3. **openparldata_search_votes** - Search parliamentary votes | |
| Parameters: | |
| - query (title/description) | |
| - date_from (YYYY-MM-DD format, e.g., "2024-01-01") | |
| - date_to (YYYY-MM-DD format, e.g., "2024-12-31" - NEVER use "now", always use actual date) | |
| - vote_type (must be "final", "detail", or "overall") | |
| - language, limit | |
| 4. **openparldata_get_vote_details** - Get detailed vote information | |
| Parameters: vote_id, include_individual_votes, language | |
| 5. **openparldata_search_motions** - Search motions and proposals | |
| Parameters: query, status, date_from (YYYY-MM-DD), date_to (YYYY-MM-DD), submitter_id, language, limit | |
| 6. **openparldata_search_debates** - Search debate transcripts | |
| Parameters: query, date_from (YYYY-MM-DD), date_to (YYYY-MM-DD), speaker_id, language, limit | |
| CRITICAL RULES: | |
| - All dates MUST be in YYYY-MM-DD format (e.g., "2024-12-31") | |
| - NEVER use "now", "today", or relative dates - always use actual YYYY-MM-DD dates | |
| - For "latest" queries, use date_from with a recent date like "2024-01-01" and NO date_to parameter | |
| - vote_type must ONLY be "final", "detail", or "overall" - no other values | |
| - Your response MUST be valid JSON only | |
| - Do NOT include explanatory text or markdown formatting | |
| When a user asks a question about Swiss parliamentary data: | |
| 1. Analyze what information they need | |
| 2. Determine which tool(s) to use | |
| 3. Extract the relevant parameters from their question | |
| 4. Respond with ONLY a JSON object containing the tool call | |
| Your response should be in this exact format: | |
| { | |
| "tool": "tool_name", | |
| "arguments": { | |
| "param1": "value1", | |
| "param2": "value2" | |
| }, | |
| "explanation": "Brief explanation of what you're searching for" | |
| } | |
| If the user's question is not about Swiss parliamentary data or you cannot determine the right tool, respond with: | |
| { | |
| "response": "Your natural language response here" | |
| } | |
| Example: | |
| User: "Who are the parliamentarians from Zurich?" | |
| Assistant: | |
| { | |
| "tool": "openparldata_search_parliamentarians", | |
| "arguments": { | |
| "canton": "ZH", | |
| "language": "en", | |
| "limit": 20 | |
| }, | |
| "explanation": "Searching for active parliamentarians from Canton Zurich" | |
| } | |
| """ | |
| # Example queries | |
| EXAMPLES = { | |
| "en": [ | |
| "Who are the parliamentarians from Zurich?", | |
| "Show me recent votes about climate policy", | |
| "What motions were submitted about healthcare in 2024?", | |
| "Find debates about immigration reform" | |
| ], | |
| "de": [ | |
| "Wer sind die Parlamentarier aus Zürich?", | |
| "Zeige mir aktuelle Abstimmungen zur Klimapolitik", | |
| "Welche Anträge zum Gesundheitswesen wurden 2024 eingereicht?", | |
| "Finde Debatten über Migrationsreform" | |
| ], | |
| "fr": [ | |
| "Qui sont les parlementaires de Zurich?", | |
| "Montrez-moi les votes récents sur la politique climatique", | |
| "Quelles motions sur la santé ont été soumises en 2024?", | |
| "Trouvez les débats sur la réforme de l'immigration" | |
| ], | |
| "it": [ | |
| "Chi sono i parlamentari di Zurigo?", | |
| "Mostrami i voti recenti sulla politica climatica", | |
| "Quali mozioni sulla sanità sono state presentate nel 2024?", | |
| "Trova i dibattiti sulla riforma dell'immigrazione" | |
| ] | |
| } | |
| async def query_model_async(message: str, language: str = "en") -> dict: | |
| """Query Llama-3.1-8B model via Inference Providers to interpret user intent and determine tool calls.""" | |
| try: | |
| # Create messages for chat completion | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"Language: {language}\nQuestion: {message}"} | |
| ] | |
| # Call Llama-3.1-8B via HuggingFace Inference Providers | |
| response = client.chat_completion( | |
| model="meta-llama/Llama-3.1-8B-Instruct", | |
| messages=messages, | |
| max_tokens=500, | |
| temperature=0.3 | |
| ) | |
| # Extract response | |
| assistant_message = response.choices[0].message.content | |
| # Try to parse as JSON | |
| try: | |
| # Clean up response (sometimes models add markdown code blocks) | |
| clean_response = assistant_message.strip() | |
| if clean_response.startswith("```json"): | |
| clean_response = clean_response[7:] | |
| if clean_response.startswith("```"): | |
| clean_response = clean_response[3:] | |
| if clean_response.endswith("```"): | |
| clean_response = clean_response[:-3] | |
| clean_response = clean_response.strip() | |
| # Find first { or [ (start of JSON) to handle explanatory text | |
| json_start = min( | |
| clean_response.find('{') if '{' in clean_response else len(clean_response), | |
| clean_response.find('[') if '[' in clean_response else len(clean_response) | |
| ) | |
| if json_start > 0: | |
| clean_response = clean_response[json_start:] | |
| return json.loads(clean_response) | |
| except json.JSONDecodeError: | |
| # If not valid JSON, treat as natural language response | |
| return {"response": assistant_message} | |
| except Exception as e: | |
| return {"error": f"Error querying model: {str(e)}"} | |
| def query_model(message: str, language: str = "en") -> dict: | |
| """Synchronous wrapper for async model query.""" | |
| return asyncio.run(query_model_async(message, language)) | |
| async def execute_tool_async(tool_name: str, arguments: dict, show_debug: bool) -> tuple: | |
| """Execute MCP tool asynchronously.""" | |
| return await execute_mcp_query("", tool_name, arguments, show_debug) | |
| def chat_response(message: str, history: list, language: str, show_debug: bool) -> str: | |
| """ | |
| Main chat response function. | |
| Args: | |
| message: User's message | |
| history: Chat history | |
| language: Selected language | |
| show_debug: Whether to show debug information | |
| Returns: | |
| Response string | |
| """ | |
| try: | |
| # Get language code | |
| lang_code = LANGUAGES.get(language, "en") | |
| # Query Phi-3 model to interpret intent | |
| model_response = query_model(message, lang_code) | |
| # Check if it's a direct response (no tool call needed) | |
| if "response" in model_response: | |
| return model_response["response"] | |
| # Check for error | |
| if "error" in model_response: | |
| return f"❌ {model_response['error']}" | |
| # Execute tool call | |
| if "tool" in model_response and "arguments" in model_response: | |
| tool_name = model_response["tool"] | |
| arguments = model_response["arguments"] | |
| explanation = model_response.get("explanation", "") | |
| # Ensure language is set in arguments | |
| if "language" not in arguments: | |
| arguments["language"] = lang_code | |
| # Execute the tool | |
| try: | |
| response, debug_info = asyncio.run( | |
| execute_tool_async(tool_name, arguments, show_debug) | |
| ) | |
| # Build final response | |
| final_response = "" | |
| if explanation: | |
| final_response += f"*{explanation}*\n\n" | |
| if show_debug and debug_info: | |
| final_response += f"### 🔧 Debug Information\n{debug_info}\n\n---\n\n" | |
| final_response += f"### 📊 Results\n{response}" | |
| return final_response | |
| except Exception as e: | |
| return f"❌ Error executing tool '{tool_name}': {str(e)}" | |
| # Fallback | |
| return "I couldn't determine how to process your request. Please try rephrasing your question." | |
| except Exception as e: | |
| return f"❌ An error occurred: {str(e)}" | |
| # Custom CSS | |
| custom_css = """ | |
| .gradio-container { | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .chatbot-header { | |
| text-align: center; | |
| padding: 20px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| } | |
| """ | |
| # Build Gradio interface | |
| with gr.Blocks(css=custom_css, title="CoJournalist Data") as demo: | |
| gr.Markdown( | |
| """ | |
| <div class="chatbot-header"> | |
| <h1>🏛️ CoJournalist Data</h1> | |
| <p>Ask questions about Swiss parliamentary data in natural language</p> | |
| </div> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot( | |
| height=500, | |
| label="Chat with CoJournalist", | |
| show_label=False | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| placeholder="Ask a question about Swiss parliamentary data...", | |
| show_label=False, | |
| scale=4 | |
| ) | |
| submit = gr.Button("Send", variant="primary", scale=1) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### ⚙️ Settings") | |
| language = gr.Radio( | |
| choices=list(LANGUAGES.keys()), | |
| value="English", | |
| label="Language", | |
| info="Select response language" | |
| ) | |
| show_debug = gr.Checkbox( | |
| label="Show debug info", | |
| value=False, | |
| info="Display tool calls and parameters" | |
| ) | |
| gr.Markdown("### 💡 Example Questions") | |
| # Dynamic examples based on language | |
| def update_examples(lang): | |
| lang_code = LANGUAGES.get(lang, "en") | |
| return gr.update( | |
| choices=EXAMPLES.get(lang_code, EXAMPLES["en"]) | |
| ) | |
| examples_dropdown = gr.Dropdown( | |
| choices=EXAMPLES["en"], | |
| label="Try these:", | |
| show_label=False | |
| ) | |
| language.change( | |
| fn=update_examples, | |
| inputs=[language], | |
| outputs=[examples_dropdown] | |
| ) | |
| # Handle message submission | |
| def respond(message, chat_history, language, show_debug, request: gr.Request): | |
| if not message.strip(): | |
| return "", chat_history | |
| # Check usage limit | |
| user_id = request.client.host if request and hasattr(request, 'client') else "unknown" | |
| if not tracker.check_limit(user_id): | |
| remaining = tracker.get_remaining(user_id) | |
| bot_message = f"⚠️ Daily request limit reached. You have used all 50 requests for today. Please try again tomorrow.\n\nThis limit helps us keep the service free and available for everyone." | |
| chat_history.append((message, bot_message)) | |
| return "", chat_history | |
| # Get bot response | |
| bot_message = chat_response(message, chat_history, language, show_debug) | |
| # Update chat history | |
| chat_history.append((message, bot_message)) | |
| return "", chat_history | |
| # Handle example selection | |
| def use_example(example): | |
| return example | |
| msg.submit(respond, [msg, chatbot, language, show_debug], [msg, chatbot]) | |
| submit.click(respond, [msg, chatbot, language, show_debug], [msg, chatbot]) | |
| examples_dropdown.change(use_example, [examples_dropdown], [msg]) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Note:** This app uses the OpenParlData MCP server to access Swiss parliamentary data. | |
| Currently returning mock data while the OpenParlData API is in development. | |
| **Rate Limit:** 50 requests per day per user to keep the service affordable and accessible. | |
| Powered by [Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) via HF Inference Providers and [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) | |
| """ | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |