""" Gradio + OpenAI Responses API + Remote MCP Server (HTTP) CSV-based MLOps Agent with streaming final answer & MCP tools """ import json import os import shutil import gradio as gr from openai import OpenAI # ------------------------- # Config # ------------------------- MCP_SERVER_URL = "https://mcp-1st-birthday-auto-deployer.hf.space/gradio_api/mcp/" OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") MODEL = "gpt-5-mini" # you can swap to gpt-5 for final answers if you want client = OpenAI(api_key=OPENAI_API_KEY) MCP_TOOLS = [ { "type": "mcp", "server_label": "auto-deployer", "server_url": MCP_SERVER_URL, "require_approval": "never", } ] # ------------------------- # Long prompts # ------------------------- GENERAL_SYSTEM_PROMPT = """ You are a helpful, concise MLOps assistant living inside a web app. Context: - The app can analyze CSV datasets, train models, and deploy them on cloud. - In this mode, you are doing *general chat only* (no tools are being run). You will receive: - A short transcript of the previous conversation (if any). - The user's latest message. Your job: - Answer the latest user message directly. - Use the previous conversation only when it is clearly relevant. - If the user is asking conceptual questions (MLOps, ML, data, deployment), explain clearly. - If they refer to the app’s capabilities, you may describe what the app can do (e.g. “upload a CSV, then ask me to analyze/train/deploy”), but do not fabricate specific model IDs or endpoints. - If something is ambiguous, make a reasonable assumption and move forward. Style: - Be clear, friendly, and pragmatic. - Use Markdown (headings, bullet points, code blocks when helpful). - Prefer short, high-signal answers over long explanations, unless the user asks for more detail. """ MAIN_SYSTEM_PROMPT = """ You are an MLOps assistant. You can internally use tools for CSV analysis, model training, evaluation, deployment, and end-to-end MLOps. Your reply must be: - Clean and user-facing - Structured with headings, sub-headings, and bullet points - High-signal and concise Always reply in this Markdown structure, omitting sections that are not relevant: ### Data Analysis (only if a dataset was analyzed) - Please provide a detailed data analysis report in bullet style, highlighting key insights. ### Model Performance (only if a model was trained) - 1–3 bullets with key metrics (e.g. Accuracy, F1, ROC-AUC). - Display the Model id ### Deployment Status (only if deployment was requested/attempted) - 1–3 bullets summarizing whether deployment is available for inference or failed. - Use clear, non-technical language (no stack traces). - Display the Model id and endpoint URL ### Example Usage (only if a model is deployed) Provide example code **outside** of any collapsible block: - One Python example in a fenced `python` code block. - One curl example in a fenced `bash` code block. These should show how to call the model or endpoint with a realistic payload, e.g.: ```python # Example – replace values with your own inputs ```` ```bash # Curl example for API endpoint `` --- After the Key Summary (including Example Usage), ALWAYS add a collapsible block for technical details:
Show Technical Details (tools, parameters, logs) #### Tools Used * List tool names used this turn. * One short line on what each did. #### Parameters Passed * Bullet list of important parameters (e.g. target column, task type, key options). #### Additional Logs / Raw Output (optional) * Short JSON snippets or log fragments if useful. * Wrap any JSON or logs in fenced code blocks.
""" # ------------------------- # Helpers # ------------------------- def history_to_text(history) -> str: """ Turn Gradio history (list of {role, content}) into a plain-text conversation transcript for the model. """ if not history: return "" lines = [] for msg in history: role = msg.get("role") content = msg.get("content", "") if role == "user": lines.append(f"User: {content}") elif role == "assistant": lines.append(f"Assistant: {content}") return "\n".join(lines) def extract_output_text(response) -> str: """ Extract text from a non-streaming Responses API call while preserving formatting. """ try: if hasattr(response, "output") and response.output and len(response.output) > 0: first = response.output[0] if getattr(first, "content", None): for content_item in first.content: if ( hasattr(content_item, "type") and content_item.type == "output_text" ): text = getattr(content_item, "text", None) if text: return text elif ( hasattr(content_item, "type") and content_item.type == "output_json" ): # If there's JSON output, format it nicely json_data = getattr(content_item, "json", None) if json_data: return f"```json\n{json.dumps(json_data, indent=2)}\n```" # Fallback return getattr(response, "output_text", None) or str(response) except Exception as e: return f"Error extracting output: {e}" def handle_upload(file_path, request: gr.Request): """ 1) Take uploaded file path (string) 2) Check file size and show warnings for large datasets 3) Copy to /tmp for a stable path 4) Build a public Gradio file URL that the MCP server can fetch via HTTP """ if not file_path: return None # Check file size and add warning if > 1.5MB url_params = "" try: file_size = os.path.getsize(file_path) file_size_mb = file_size / (1024 * 1024) # Convert to MB if file_size_mb > 1.5: # Show Gradio warning with 5-second auto-dismiss gr.Warning( f"Large dataset detected! Your file is {file_size_mb:.1f}MB.", duration=5, ) gr.Warning( "For optimal performance, training will use only the first 10,000 rows.", duration=10, ) except Exception: # If we can't check file size, continue without warnings pass local_path = file_path stable_path = os.path.join("/tmp", os.path.basename(local_path)) try: shutil.copy(local_path, stable_path) local_path = stable_path except Exception: # If copy fails, just use the original path pass base_url = str(request.base_url).rstrip("/") public_url = f"{base_url}/gradio_api/file={local_path}{url_params}" return public_url def should_use_tools(user_msg: str) -> bool: """ Simple heuristic to decide if this turn should trigger MCP tools. Only fire tools if the user is clearly asking for data / model work. """ text = user_msg.lower() keywords = [ "data", "dataset", "csv", "train", "training", "model", "deploy", "deployment", "predict", "prediction", "inference", "evaluate", "evaluation", "analyze", "analysis", ] return any(k in text for k in keywords) # ------------------------- # Main chat handler (streaming + disabling textbox) # ------------------------- def chat_send_stream(user_msg, history, file_url): """ Main Gradio streaming handler. - If the user is just chatting (e.g., "hey"), respond directly with a streaming answer (no tools, no CSV required). - If the user clearly asks for data/model operations: Call API once with MCP tools and stream the natural language results directly - Keeps full chat history so follow-ups work. - Shows status/progress messages in the UI when tools are used. - Disables the textbox during work, re-enables at the end. """ # UI history (what Gradio displays) if history is None: history = [] # Append the user message to the UI history history.append({"role": "user", "content": user_msg}) # Conversation before this turn (for context) convo_before = history_to_text(history[:-1]) # Decide if this message should trigger tools use_tools = should_use_tools(user_msg) # ------------------------- # BRANCH 1: No tools (normal chat, e.g. "hey") # ------------------------- if not use_tools: # Add a small status bubble then stream history.append({"role": "assistant", "content": "Generating answer..."}) # Disable textbox while generating yield ( history, gr.update(interactive=False), ) # Build input text for Responses API input_text = ( (f"Conversation so far:\n{convo_before}\n\n" if convo_before else "") + "Latest user message:\n" + user_msg ) stream = client.responses.create( model=MODEL, instructions=GENERAL_SYSTEM_PROMPT, input=input_text, reasoning={"effort": "low"}, stream=True, ) final_text = "" for event in stream: if event.type == "response.output_text.delta": final_text += event.delta history[-1]["content"] = final_text yield ( history, gr.update(interactive=False), ) elif event.type == "response.completed": break # Re-enable textbox at the end yield ( history, gr.update(interactive=True, value=""), ) return # ------------------------- # BRANCH 2: Tools needed (data / model operations) # ------------------------- # If tools are needed but no file URL, ask for CSV if not file_url: history.append( { "role": "assistant", "content": ( "To analyze, train, or deploy, please upload a CSV file first " "using the file upload control." ), } ) # Keep textbox enabled because nothing heavy is happening yield ( history, gr.update(interactive=True), ) return # User message for the model includes the CSV URL user_with_file = f"[Uploaded CSV file URL: {file_url}]\n\n{user_msg}" # Show a status message in UI history.append( { "role": "assistant", "content": "Analyzing your request and running MCP tools...", } ) # Disable textbox while tools run yield ( history, gr.update(interactive=False), ) # Build input for the tool phase (single call) tool_input = ( (f"Conversation so far:\n{convo_before}\n\n" if convo_before else "") + "Latest user request (with file URL):\n" + user_with_file ) # Single API call with tools - MCP returns natural language results stream = client.responses.create( model=MODEL, instructions=MAIN_SYSTEM_PROMPT, input=tool_input, tools=MCP_TOOLS, reasoning={"effort": "low"}, stream=True, ) # Replace status message with streaming answer history[-1] = {"role": "assistant", "content": ""} final_text = "" for event in stream: if event.type == "response.output_text.delta": final_text += event.delta history[-1]["content"] = final_text yield ( history, gr.update(interactive=False), ) elif event.type == "response.completed": break # Re-enable textbox at the end, and clear it yield ( history, gr.update(interactive=True, value=""), ) # ------------------------- # Gradio UI # ------------------------- with gr.Blocks(title="Streaming MLOps Agent") as demo: gr.Markdown( """ # 🧠 Smart MLOps Agent - 💬 Chat naturally, even just “hey” - 📂 Upload CSVs for analysis, training, and deployment - ⚡ See live tool status and streaming answers """ ) file_url_state = gr.State(value=None) uploader = gr.File( label="Upload CSV File", file_count="single", type="filepath", file_types=[".csv"], ) uploader.change( handle_upload, inputs=[uploader], outputs=[file_url_state], ) chatbot = gr.Chatbot( label="Chat", render_markdown=True, height=500, avatar_images=( None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png", ), ) msg = gr.Textbox( label="Message", interactive=True, placeholder="Say hi, or ask me to analyze / train / deploy on your dataset...", ) # Only Enter/Return sends messages; no Send button msg.submit( chat_send_stream, inputs=[msg, chatbot, file_url_state], outputs=[chatbot, msg], ) gr.Examples( examples=[ ["Analyze the dataset", os.path.join("data", "heart.csv")], ["Train the classifier with HeartDisease as target", os.path.join("data", "heart.csv")], ["Deploy the model using model_1764524701 model id", os.path.join("data", "heart.csv")], ["Auto deploy the model using MEDV as target", os.path.join("data", "housing.csv")], ], inputs=[msg, uploader], label="Try an example", ) if __name__ == "__main__": demo.queue().launch( theme=gr.themes.Soft(primary_hue="green", secondary_hue="blue"), allowed_paths=["/tmp"], ssr_mode=False, show_error=True, max_file_size="10mb", )