Spaces:

Jofthomas
/

Ministrals_demo

Running on Zero

File size: 37,894 Bytes

import base64
from http import HTTPStatus
import os
import uuid
import time
from datetime import datetime, timedelta
import torch
import gradio as gr
from gradio_client import utils as client_utils
import modelscope_studio.components.antd as antd
import modelscope_studio.components.antdx as antdx
import modelscope_studio.components.base as ms
import modelscope_studio.components.pro as pro
from config import DEFAULT_THEME, LIGHT_THEME, DEFAULT_SYS_PROMPT,DEFAULT_MODEL_SIZE, save_history, user_config, bot_config, welcome_config, markdown_config, upload_config, MINISTRAL_MODELS
from ui_components.thinking_button import ThinkingButton
import spaces

try:
    from transformers import Mistral3ForConditionalGeneration, AutoProcessor, AutoTokenizer, TextIteratorStreamer
    from huggingface_hub import hf_hub_download
    from threading import Thread
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    TRANSFORMERS_AVAILABLE = False
    print("Warning: transformers not available. Running in demo mode only.")

MODEL_CACHE = {}
SYSTEM_PROMPT_CACHE = {}
PROCESSOR_CACHE = {}

print("=" * 50)
print("🚀 Ministral Demo Starting")
print(f"   Model cache initialized (empty)")
print(f"   Processor cache initialized (empty)")
print(f"   System prompt cache initialized (empty)")
print("=" * 50)

def log_with_time(message: str):
    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
    print(f"[{timestamp}] {message}")

def log_cache_status():
    log_with_time(f"📦 Cache status: {len(MODEL_CACHE)} models, {len(PROCESSOR_CACHE)} processors, {len(SYSTEM_PROMPT_CACHE)} prompts cached")

def load_system_prompt(model_id: str) -> str:
    cache_key = model_id
    if cache_key in SYSTEM_PROMPT_CACHE:
        log_with_time(f"📋 System prompt cache hit for {model_id.split('/')[-1]}")
        cached_prompt = SYSTEM_PROMPT_CACHE[cache_key]
        today = datetime.today().strftime("%Y-%m-%d")
        yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
        model_name = model_id.split("/")[-1]
        return cached_prompt.format(name=model_name, today=today, yesterday=yesterday)
    
    try:
        log_with_time(f"📥 Downloading system prompt for {model_id.split('/')[-1]}...")
        start = time.time()
        file_path = hf_hub_download(repo_id=model_id, filename="SYSTEM_PROMPT.txt")
        with open(file_path, "r") as file:
            system_prompt = file.read()
        
        SYSTEM_PROMPT_CACHE[cache_key] = system_prompt
        
        today = datetime.today().strftime("%Y-%m-%d")
        yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
        model_name = model_id.split("/")[-1]
        log_with_time(f"✅ System prompt loaded in {time.time() - start:.2f}s")
        return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
    except Exception as e:
        log_with_time(f"⚠️ Could not load system prompt: {e}")
        return DEFAULT_SYS_PROMPT

def get_processor_and_tokenizer(model_id: str):
    if model_id in PROCESSOR_CACHE:
        log_with_time(f"📋 Processor cache hit for {model_id.split('/')[-1]}")
        return PROCESSOR_CACHE[model_id]
    
    try:
        log_with_time(f"📥 Loading processor for {model_id.split('/')[-1]}...")
        start = time.time()
        processor = AutoProcessor.from_pretrained(model_id)
        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
        PROCESSOR_CACHE[model_id] = (processor, tokenizer)
        log_with_time(f"✅ Processor loaded in {time.time() - start:.2f}s")
        return processor, tokenizer
    except Exception as e:
        log_with_time(f"❌ Error loading processor: {e}")
        return None, None

def get_model_and_processor(model_id: str, show_notification=False):
    if not TRANSFORMERS_AVAILABLE:
        log_with_time("⚠️ Transformers not available")
        return None, None, None
    
    if model_id in MODEL_CACHE:
        log_with_time(f"📋 Model cache hit for {model_id.split('/')[-1]}")
        return MODEL_CACHE[model_id]
    
    model_name = model_id.split("/")[-1]
    
    try:
        if show_notification:
            gr.Info(f"📥 Loading {model_name}... This may take a few minutes on first use.", duration=15)
        
        total_start = time.time()
        
        processor, tokenizer = get_processor_and_tokenizer(model_id)
        if processor is None or tokenizer is None:
            return None, None, None
        
        log_with_time(f"📥 Loading model weights for {model_name}...")
        model_start = time.time()
        model = Mistral3ForConditionalGeneration.from_pretrained(
            model_id, 
            torch_dtype=torch.bfloat16, 
            device_map="auto"
        ).eval()
        log_with_time(f"✅ Model weights loaded in {time.time() - model_start:.2f}s")
        
        MODEL_CACHE[model_id] = (model, processor, tokenizer)
        total_time = time.time() - total_start
        log_with_time(f"🎉 {model_name} fully loaded in {total_time:.2f}s (cached for future use)")
        
        if show_notification:
            gr.Info(f"✅ {model_name} loaded and ready!", duration=3)
        
        return model, processor, tokenizer
    except Exception as e:
        log_with_time(f"❌ Error loading model {model_id}: {e}")
        if show_notification:
            gr.Warning(f"❌ Failed to load model: {str(e)}", duration=10)
        return None, None, None

def encode_file_to_base64(file_path):
    with open(file_path, "rb") as file:
        mime_type = client_utils.get_mimetype(file_path)
        bae64_data = base64.b64encode(file.read()).decode("utf-8")
        return f"data:{mime_type};base64,{bae64_data}"

def format_history_for_transformers(history, model_id):
    system_prompt = load_system_prompt(model_id)
    
    messages = [{
        "role": "system",
        "content": [{"type": "text", "text": system_prompt}]
    }]
    
    for item in history:
        if item["role"] == "user":
            content = []
            text_content = item["content"][1]["content"]
            content.append({"type": "text", "text": text_content})
            
            for file_path in item["content"][0]["content"]:
                if file_path.startswith("http"):
                    content.append({"type": "image", "url": file_path})
                elif os.path.exists(file_path):
                    mime_type = client_utils.get_mimetype(file_path)
                    if mime_type.startswith("image"):
                        content.append({"type": "image", "url": file_path})
            
            messages.append({
                "role": "user",
                "content": content
            })
        elif item["role"] == "assistant":
            text_contents = [content["content"] for content in item["content"] if content["type"] == "text"]
            if text_contents:
                messages.append({
                    "role": "assistant",
                    "content": [{"type": "text", "text": " ".join(text_contents)}]
                })
    
    return messages

def prepare_inputs(processor, messages):
    log_with_time("📋 Preparing inputs...")
    inputs = processor.apply_chat_template(
        messages, 
        add_generation_prompt=True, 
        tokenize=True,
        return_dict=True, 
        return_tensors="pt"
    )
    
    if 'token_type_ids' in inputs:
        del inputs['token_type_ids']
    
    return inputs

@spaces.GPU(duration=180)
def generate_streaming(model, processor, tokenizer, messages):
    log_with_time("🚀 Starting streaming generation...")
    
    inputs = prepare_inputs(processor, messages)
    
    log_with_time("📤 Moving tensors to GPU...")
    inputs = {
        k: (v.to(model.device, dtype=torch.bfloat16) if v.is_floating_point() else v.to(model.device))
        for k, v in inputs.items()
    }
    
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=2048,
        temperature=0.15,
        do_sample=True,
    )
    
    log_with_time("🧵 Starting generation thread...")
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    
    log_with_time("📝 Streaming tokens...")
    generated_text = ""
    for token in streamer:
        generated_text += token
        yield {"token": token, "full_text": generated_text, "done": False}
    
    thread.join()
    log_with_time(f"✅ Generation complete: {len(generated_text)} chars")
    yield {"token": "", "full_text": generated_text, "done": True}


class Gradio_Events:

    @staticmethod
    def submit(state_value):
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        enable_thinking = state_value["conversation_contexts"][state_value["conversation_id"]]["enable_thinking"]
        model_size = state_value["conversation_contexts"][state_value["conversation_id"]].get("model_size", "14B")
        
        model_type = "reasoning" if enable_thinking else "instruct"
        model_id = MINISTRAL_MODELS[model_size][model_type]
        
        log_cache_status()
        
        log_with_time(f"📝 Formatting {len(history)} messages for {model_id.split('/')[-1]}")
        messages = format_history_for_transformers(history, model_id)
        log_with_time(f"📨 {len(messages)} messages prepared (including system prompt)")
        
        history.append({
            "role": "assistant",
            "content": [],
            "key": str(uuid.uuid4()),
            "loading": True,
            "header": f"Ministral-3-{model_size}",
            "status": "pending"
        })

        yield {
            chatbot: gr.update(value=history),
            state: gr.update(value=state_value),
        }
        try:
            start_time = time.time()
            answer_content = ""
            
            if TRANSFORMERS_AVAILABLE:
                needs_download = model_id not in MODEL_CACHE
                model, processor, tokenizer = get_model_and_processor(model_id, show_notification=needs_download)
            else:
                model, processor, tokenizer = None, None, None
            
            if model is not None and processor is not None and tokenizer is not None:
                log_with_time(f"🚀 Starting inference with {model_id.split('/')[-1]}")
                
                try:
                    inference_start = time.time()
                    
                    history[-1]["content"] = [{
                        "type": "text",
                        "content": "",
                    }]
                    history[-1]["loading"] = False
                    
                    for chunk in generate_streaming(model, processor, tokenizer, messages):
                        if chunk.get("token"):
                            answer_content = chunk["full_text"]
                            history[-1]["content"][0]["content"] = answer_content
                            yield {
                                chatbot: gr.update(value=history),
                                state: gr.update(value=state_value)
                            }
                        
                        if chunk.get("done"):
                            answer_content = chunk["full_text"]
                            break
                    
                    inference_time = time.time() - inference_start
                    log_with_time(f"✅ Streaming generation completed in {inference_time:.2f}s")
                    
                except Exception as e:
                    log_with_time(f"❌ Model inference error: {e}")
                    error_str = str(e)
                    
                    if "timeout" in error_str.lower() or "aborted" in error_str.lower():
                        answer_content = "⏱️ GPU timeout: The request took too long to process. Please try:\n\n1. Using a shorter prompt\n2. Reducing image sizes\n3. Trying again in a moment"
                    elif "memory" in error_str.lower() or "oom" in error_str.lower():
                        answer_content = "💾 GPU out of memory. Try using a smaller model or reducing input size."
                    else:
                        answer_content = f"❌ Model inference failed: {error_str}\n\nPlease try again or check the console for more details."
                    
                    history[-1]["content"] = [{
                        "type": "text",
                        "content": answer_content,
                    }]
                    history[-1]["loading"] = False
                    yield {
                        chatbot: gr.update(value=history),
                        state: gr.update(value=state_value)
                    }
                    
            else:
                log_with_time(f"⚠️ Using demo mode for: {model_id}")
                demo_answer = f"This is a demo response from {model_id}. The application is running in demo mode.\n\nTo use real models, install transformers: `pip install transformers torch`\n\nYour message: {messages[-1]['content'][0]['text'] if messages and messages[-1]['content'] else 'N/A'}"
                
                history[-1]["content"] = [{
                    "type": "text",
                    "content": "",
                }]
                
                for char in demo_answer:
                    answer_content += char
                    history[-1]["content"][0]["content"] = answer_content
                    history[-1]["loading"] = False
                    yield {
                        chatbot: gr.update(value=history),
                        state: gr.update(value=state_value)
                    }
                    time.sleep(0.01)
            
            log_with_time(f"📊 Response generated: {len(answer_content)} chars")
            history[-1]["status"] = "done"
            cost_time = "{:.2f}".format(time.time() - start_time)
            log_with_time(f"⏱️ Total request time: {cost_time}s")
            history[-1]["footer"] = f"{cost_time}s"
            yield {
                chatbot: gr.update(value=history),
                state: gr.update(value=state_value),
            }
        except Exception as e:
            log_with_time(f"❌ Request failed for {model_id.split('/')[-1]}: {e}")
            history[-1]["loading"] = False
            history[-1]["status"] = "done"
            if not history[-1]["content"]:
                history[-1]["content"] = []
            history[-1]["content"].append({
                "type": "text",
                "content": f'<span style="color: var(--color-red-500)">Error: {str(e)}</span>'
            })
            yield {
                chatbot: gr.update(value=history),
                state: gr.update(value=state_value)
            }

    @staticmethod
    def add_message(input_value, thinking_btn_state_value, model_selector_state_value, state_value):
        text = input_value["text"]
        files = input_value["files"]
        if not state_value["conversation_id"]:
            random_id = str(uuid.uuid4())
            history = []
            state_value["conversation_id"] = random_id
            state_value["conversation_contexts"][state_value["conversation_id"]] = {"history": history}
            state_value["conversations"].append({"label": text, "key": random_id})

        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]

        state_value["conversation_contexts"][state_value["conversation_id"]] = {
            "history": history,
            "enable_thinking": thinking_btn_state_value["enable_thinking"],
            "model_size": model_selector_state_value["model_size"]
        }

        history.append({
            "key": str(uuid.uuid4()),
            "role": "user",
            "content": [{"type": "file", "content": [f for f in files]}, {"type": "text", "content": text}]
        })
        yield Gradio_Events.preprocess_submit(clear_input=True)(state_value)

        try:
            for chunk in Gradio_Events.submit(state_value):
                yield chunk
        except Exception as e:
            raise e
        finally:
            yield Gradio_Events.postprocess_submit(state_value)

    @staticmethod
    def preprocess_submit(clear_input=True):
        def preprocess_submit_handler(state_value):
            history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
            return {
                **({input: gr.update(value=None, loading=True) if clear_input else gr.update(loading=True)} if clear_input else {}),
                conversations: gr.update(active_key=state_value["conversation_id"],
                    items=list(map(lambda item: {**item, "disabled": True if item["key"] != state_value["conversation_id"] else False}, state_value["conversations"]))),
                add_conversation_btn: gr.update(disabled=True),
                clear_btn: gr.update(disabled=True),
                conversation_delete_menu_item: gr.update(disabled=True),
                chatbot: gr.update(value=history, bot_config=bot_config(disabled_actions=['edit', 'retry', 'delete']), user_config=user_config(disabled_actions=['edit', 'delete'])),
                state: gr.update(value=state_value),
            }
        return preprocess_submit_handler

    @staticmethod
    def postprocess_submit(state_value):
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        return {
            input: gr.update(loading=False),
            conversation_delete_menu_item: gr.update(disabled=False),
            clear_btn: gr.update(disabled=False),
            conversations: gr.update(items=state_value["conversations"]),
            add_conversation_btn: gr.update(disabled=False),
            chatbot: gr.update(value=history, bot_config=bot_config(), user_config=user_config()),
            state: gr.update(value=state_value),
        }

    @staticmethod
    def cancel(state_value):
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history[-1]["loading"] = False
        history[-1]["status"] = "done"
        history[-1]["footer"] = "Chat completion paused"
        return Gradio_Events.postprocess_submit(state_value)

    @staticmethod
    def delete_message(state_value, e: gr.EventData):
        index = e._data["payload"][0]["index"]
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history = history[:index] + history[index + 1:]
        state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = history
        return gr.update(value=state_value)

    @staticmethod
    def edit_message(state_value, chatbot_value, e: gr.EventData):
        index = e._data["payload"][0]["index"]
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history[index]["content"] = chatbot_value[index]["content"]
        if not history[index].get("edited"):
            history[index]["edited"] = True
            history[index]["footer"] = ((history[index]["footer"]) + " " if history[index].get("footer") else "") + "Edited"
        return gr.update(value=state_value), gr.update(value=history)

    @staticmethod
    def regenerate_message(thinking_btn_state_value, model_selector_state_value, state_value, e: gr.EventData):
        index = e._data["payload"][0]["index"]
        history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"]
        history = history[:index]

        state_value["conversation_contexts"][state_value["conversation_id"]] = {
            "history": history,
            "enable_thinking": thinking_btn_state_value["enable_thinking"],
            "model_size": model_selector_state_value["model_size"]
        }

        yield Gradio_Events.preprocess_submit()(state_value)
        try:
            for chunk in Gradio_Events.submit(state_value):
                yield chunk
        except Exception as e:
            raise e
        finally:
            yield Gradio_Events.postprocess_submit(state_value)

    @staticmethod
    def apply_prompt(e: gr.EventData, input_value):
        input_value["text"] = e._data["payload"][0]["value"]["description"]
        input_value["files"] = e._data["payload"][0]["value"]["urls"]
        return gr.update(value=input_value)

    @staticmethod
    def new_chat(thinking_btn_state, model_selector_state, state_value):
        if not state_value["conversation_id"]:
            return gr.skip()
        state_value["conversation_id"] = ""
        thinking_btn_state["enable_thinking"] = True
        model_selector_state["model_size"] = "14B"
        return gr.update(active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=thinking_btn_state), gr.update(value="14B"), gr.update(value=state_value)

    @staticmethod
    def select_conversation(thinking_btn_state_value, model_selector_state_value, state_value, e: gr.EventData):
        active_key = e._data["payload"][0]
        if state_value["conversation_id"] == active_key or (active_key not in state_value["conversation_contexts"]):
            return gr.skip()
        state_value["conversation_id"] = active_key
        thinking_btn_state_value["enable_thinking"] = state_value["conversation_contexts"][active_key].get("enable_thinking", False)
        model_size = state_value["conversation_contexts"][active_key].get("model_size", "14B")
        model_selector_state_value["model_size"] = model_size
        return gr.update(active_key=active_key), gr.update(value=state_value["conversation_contexts"][active_key]["history"]), gr.update(value=thinking_btn_state_value), gr.update(value=model_size), gr.update(value=state_value)

    @staticmethod
    def click_conversation_menu(state_value, e: gr.EventData):
        conversation_id = e._data["payload"][0]["key"]
        operation = e._data["payload"][1]["key"]
        if operation == "delete":
            del state_value["conversation_contexts"][conversation_id]
            state_value["conversations"] = [item for item in state_value["conversations"] if item["key"] != conversation_id]
            if state_value["conversation_id"] == conversation_id:
                state_value["conversation_id"] = ""
                return gr.update(items=state_value["conversations"], active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=state_value)
            else:
                return gr.update(items=state_value["conversations"]), gr.skip(), gr.update(value=state_value)
        return gr.skip()

    @staticmethod
    def clear_conversation_history(state_value):
        if not state_value["conversation_id"]:
            return gr.skip()
        state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = []
        return gr.update(value=None), gr.update(value=state_value)

    @staticmethod
    def update_browser_state(state_value):
        return gr.update(value=dict(conversations=state_value["conversations"], conversation_contexts=state_value["conversation_contexts"]))

    @staticmethod
    def apply_browser_state(browser_state_value, state_value):
        state_value["conversations"] = browser_state_value["conversations"]
        state_value["conversation_contexts"] = browser_state_value["conversation_contexts"]
        return gr.update(items=browser_state_value["conversations"]), gr.update(value=state_value)


css = """
body, html {
  background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
  color: var(--ms-gr-ant-color-text, #000000) !important;
}

.gradio-container, .gradio-container.dark {
  padding: 0 !important;
  background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
  color: var(--ms-gr-ant-color-text, #000000) !important;
}
.gradio-container > main.fillable {
  padding: 0 !important;
  background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
}

#chatbot .ms-gr-ant-col,
#chatbot .ms-gr-antd-col,
#chatbot [class*="ms-gr-ant-col"] {
  padding-left: 0 !important;
  padding-right: 0 !important;
  background-color: transparent !important;
}

#chatbot {
  height: calc(100vh - 21px - 16px);
  max-height: 1500px;
  background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
}
#chatbot .chatbot-conversations {
  height: 100vh;
  background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important;
  padding-left: 4px;
  padding-right: 4px;
}
#chatbot .chatbot-conversations .chatbot-conversations-list {
  padding-left: 0;
  padding-right: 0;
}
#chatbot .chatbot-chat {
  padding: 32px;
  padding-bottom: 0;
  height: 100%;
  background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
  color: var(--ms-gr-ant-color-text, #000000) !important;
}
@media (max-width: 768px) {
  #chatbot .chatbot-chat {
      padding: 10px;
  }
}
#chatbot .chatbot-chat .chatbot-chat-messages {
  flex: 1;
  background-color: transparent !important;
}

.gradio-container .contain {
  background-color: transparent !important;
  padding: 0 !important;
}

.user-message-content {
  background-color: #ffffff !important;
  background: #ffffff !important;
  border-radius: 16px !important;
  padding: 14px 18px !important;
  border: 1px solid #E9E2CB !important;
  color: #1E1E1E !important;
  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06) !important;
  max-width: fit-content;
}

[class*="chatbot"] [class*="user"] [class*="content"],
[class*="chatbot"] [class*="user"] [class*="bubble"],
[class*="chatbot"] [class*="user"] [class*="message"],
[class*="pro-chatbot"] [class*="user"] {
  background-color: transparent !important;
  background: transparent !important;
}

.user-message-content,
.user-message-content *:not(code):not(pre) {
  background-color: inherit !important;
}

.chatbot-welcome-prompts {
  background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important;
}

[class*="welcome"] [class*="prompt"],
[class*="welcome"] [class*="prompts"],
[class*="prompts"] [class*="item"],
[class*="prompts"] [class*="card"],
.ms-gr-antdx-prompts-item,
.ms-gr-pro-chatbot-welcome-prompts {
  color: #1E1E1E !important;
}

[class*="welcome"] span,
[class*="welcome"] p,
[class*="welcome"] div,
[class*="prompts"] span,
[class*="prompts"] p,
[class*="prompts"] div,
[class*="prompts"] [class*="title"],
[class*="prompts"] [class*="description"],
.ms-gr-antdx-prompts-item span,
.ms-gr-antdx-prompts-item p,
.ms-gr-antdx-prompts-item div {
  color: #1E1E1E !important;
}

[class*="prompts"] [class*="item"] {
  background-color: #FFFAEB !important;
  border: 1px solid #E9E2CB !important;
}

.chatbot-conversations {
  background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important;
}

.chatbot-conversations .ms-gr-ant-typography {
  color: var(--ms-gr-ant-color-text, #000000) !important;
}

.chatbot-conversations .ms-gr-ant-btn-color-primary.ms-gr-ant-btn-variant-filled {
  background-color: var(--ms-gr-ant-color-primary, #FF8205) !important;
  color: #ffffff !important;
  border: none !important;
}

.chatbot-conversations .ms-gr-ant-btn-color-primary.ms-gr-ant-btn-variant-filled:hover {
  background-color: #FA500F !important;
  transform: translateY(-1px);
}

.chatbot-conversations .ms-gr-ant-conversations {
  color: var(--ms-gr-ant-color-text, #000000) !important;
}

.chatbot-conversations .ms-gr-ant-conversations-item {
  color: var(--ms-gr-ant-color-text, #000000) !important;
  background-color: transparent !important;
}

.chatbot-conversations .ms-gr-ant-conversations-item:hover {
  background-color: var(--ms-gr-ant-color-bg-elevated, #E9E2CB) !important;
}

.ant-typography {
  color: var(--ms-gr-ant-color-text, #000000) !important;
}

.ant-flex {
  color: var(--ms-gr-ant-color-text, #000000) !important;
}

#chatbot > .ant-col {
  background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important;
}

h1, h2, h3, h4, h5, h6, p, span {
  color: var(--ms-gr-ant-color-text);
}

.ms-gr-pro-chatbot-bot,
.ms-gr-pro-chatbot-bot *,
.ms-gr-pro-chatbot-bot .ms-gr-pro-chatbot-message-content,
.ms-gr-pro-chatbot-bot [class*="content"],
[class*="chatbot"] [class*="bot"],
[class*="chatbot"] [class*="bot"] * {
  color: #1E1E1E !important;
}

.ms-gr-pro-chatbot-bot h1,
.ms-gr-pro-chatbot-bot h2,
.ms-gr-pro-chatbot-bot h3,
.ms-gr-pro-chatbot-bot h4,
.ms-gr-pro-chatbot-bot h5,
.ms-gr-pro-chatbot-bot h6,
.ms-gr-pro-chatbot-bot strong,
.ms-gr-pro-chatbot-bot b,
.ms-gr-pro-chatbot-bot em,
.ms-gr-pro-chatbot-bot i,
.ms-gr-pro-chatbot-bot p,
.ms-gr-pro-chatbot-bot span,
.ms-gr-pro-chatbot-bot li,
.ms-gr-pro-chatbot-bot ul,
.ms-gr-pro-chatbot-bot ol,
.ms-gr-pro-chatbot-bot a,
[class*="chatbot"] [class*="bot"] h1,
[class*="chatbot"] [class*="bot"] h2,
[class*="chatbot"] [class*="bot"] h3,
[class*="chatbot"] [class*="bot"] strong,
[class*="chatbot"] [class*="bot"] b,
[class*="chatbot"] [class*="bot"] p,
[class*="chatbot"] [class*="bot"] span,
[class*="chatbot"] [class*="bot"] li {
  color: #1E1E1E !important;
}

.ms-gr-pro-chatbot-bot [style*="color"],
[class*="chatbot"] [class*="bot"] [style*="color"],
[class*="chatbot"] [class*="bot"] [style] {
  color: #1E1E1E !important;
}

.ms-gr-pro-chatbot-bot pre,
.ms-gr-pro-chatbot-bot pre code {
  background-color: #E9E2CB !important;
  color: #1E1E1E !important;
}

.ms-gr-pro-chatbot-bot code:not(pre code) {
  background-color: #E9E2CB !important;
  color: #1E1E1E !important;
  padding: 2px 6px;
  border-radius: 4px;
}

footer {
  display: none !important;
}
.footer {
  display: none !important;
}

*, *::before, *::after {
  transition: none !important;
}
"""

with gr.Blocks(
    fill_width=True, 
    css=css, 
    theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray", neutral_hue="gray")
) as demo:

    state = gr.State({"conversation_contexts": {}, "conversations": [], "conversation_id": "", "oss_cache": {}})
    thinking_btn_state = gr.State({"enable_thinking": False})
    model_selector_state = gr.State({"model_size": "14B"})

    with ms.Application(), antdx.XProvider(theme=LIGHT_THEME), ms.AutoLoading():
        with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"):
            with antd.Col(md=dict(flex="0 0 260px", span=24, order=0), span=0, order=1, elem_style=dict(width=0)):
                with ms.Div(elem_classes="chatbot-conversations"):
                    with antd.Flex(vertical=True, gap="small", elem_style=dict(height="100%")):
                        with antd.Flex(vertical=True, gap="small", align="center", elem_style=dict(padding=8)):
                            with antd.Typography.Title(level=1, elem_style=dict(fontSize=24, margin=0)):
                                with antd.Flex(align="center", gap="small", justify="center"):
                                    antd.Image('./assets/m-boxed-rainbow.png', preview=False, alt="logo", width=24, height=24)
                                    ms.Span("Ministrals Demo")

                        with antd.Button(value=None, color="primary", variant="filled", block=True) as add_conversation_btn:
                            ms.Text("New Conversation")
                            with ms.Slot("icon"):
                                antd.Icon("PlusOutlined")

                        with antdx.Conversations(elem_classes="chatbot-conversations-list") as conversations:
                            with ms.Slot('menu.items'):
                                with antd.Menu.Item(label="Delete", key="delete", danger=True) as conversation_delete_menu_item:
                                    with ms.Slot("icon"):
                                        antd.Icon("DeleteOutlined")

            with antd.Col(flex=1, elem_style=dict(height="100%")):
                with antd.Flex(vertical=True, gap="small", elem_classes="chatbot-chat"):
                    with antd.Flex(align="center", gap="large", elem_style=dict(paddingBottom=10)):
                        antd.Typography.Title("Hello, I'm Ministral", level=3, elem_style=dict(margin=0))
                        with antd.Flex(align="center", gap="small"):
                            ms.Span("currently using:", elem_style=dict(fontSize=12))
                            model_display = antd.Typography.Text(
                                value="mistralai/Ministral-3-14B-Instruct-2512",
                                copyable=True, code=True,
                                elem_style=dict(fontSize=12, color="var(--ms-gr-ant-color-text-secondary)")
                            )
                    
                    chatbot = pro.Chatbot(elem_classes="chatbot-chat-messages", height=0,
                                          markdown_config=markdown_config(), welcome_config=welcome_config(),
                                          user_config=user_config(), bot_config=bot_config())

                    with pro.MultimodalInput(placeholder="How can I help you today?", upload_config=upload_config()) as input:
                        with ms.Slot("prefix"):
                            with antd.Flex(gap=4, wrap=True, elem_style=dict(maxWidth='40vw', display="inline-flex")):
                                with antd.Button(value=None, type="text") as clear_btn:
                                    with ms.Slot("icon"):
                                        antd.Icon("ClearOutlined")
                                
                                model_selector = antd.Select(
                                    value=DEFAULT_MODEL_SIZE, default_value=DEFAULT_MODEL_SIZE,
                                    options=[{"label": "Ministral-3-14B", "value": "14B"}, {"label": "Ministral-3-8B", "value": "8B"}, {"label": "Ministral-3-3B", "value": "3B"}],
                                    elem_style=dict(width=180)
                                )
                                
                                with antd.Button("Thinking", shape="round", color="primary") as thinking_btn:
                                    with ms.Slot("icon"):
                                        antd.Icon("SunOutlined")

    def toggle_thinking(state_value):
        state_value["enable_thinking"] = not state_value["enable_thinking"]
        return gr.update(value=state_value)

    def apply_thinking_style(state_value):
        return gr.update(variant="solid" if state_value["enable_thinking"] else "")

    def update_model_size(value, state_value):
        state_value["model_size"] = value
        return gr.update(value=state_value)
    
    def update_model_display(thinking_state, model_state):
        model_size = model_state.get("model_size", "14B")
        model_type = "reasoning" if thinking_state.get("enable_thinking", False) else "instruct"
        model_name = MINISTRAL_MODELS[model_size][model_type]
        return gr.update(value=model_name)

    thinking_btn_state.change(fn=apply_thinking_style, inputs=[thinking_btn_state], outputs=[thinking_btn])
    thinking_btn_state.change(fn=update_model_display, inputs=[thinking_btn_state, model_selector_state], outputs=[model_display])
    thinking_btn.click(fn=toggle_thinking, inputs=[thinking_btn_state], outputs=[thinking_btn_state])
    
    model_selector.change(fn=update_model_size, inputs=[model_selector, model_selector_state], outputs=[model_selector_state])
    model_selector_state.change(fn=update_model_display, inputs=[thinking_btn_state, model_selector_state], outputs=[model_display])

    if save_history:
        browser_state = gr.BrowserState({"conversation_contexts": {}, "conversations": []}, storage_key="ministral_demo_storage")
        state.change(fn=Gradio_Events.update_browser_state, inputs=[state], outputs=[browser_state])
        demo.load(fn=Gradio_Events.apply_browser_state, inputs=[browser_state, state], outputs=[conversations, state])

    add_conversation_btn.click(fn=Gradio_Events.new_chat, inputs=[thinking_btn_state, model_selector_state, state], outputs=[conversations, chatbot, thinking_btn_state, model_selector, state])
    conversations.active_change(fn=Gradio_Events.select_conversation, inputs=[thinking_btn_state, model_selector_state, state], outputs=[conversations, chatbot, thinking_btn_state, model_selector, state])
    conversations.menu_click(fn=Gradio_Events.click_conversation_menu, inputs=[state], outputs=[conversations, chatbot, state])
    
    chatbot.welcome_prompt_select(fn=Gradio_Events.apply_prompt, inputs=[input], outputs=[input])
    chatbot.delete(fn=Gradio_Events.delete_message, inputs=[state], outputs=[state])
    chatbot.edit(fn=Gradio_Events.edit_message, inputs=[state, chatbot], outputs=[state, chatbot])

    regenerating_event = chatbot.retry(fn=Gradio_Events.regenerate_message, inputs=[thinking_btn_state, model_selector_state, state],
                                       outputs=[input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state])

    submit_event = input.submit(fn=Gradio_Events.add_message, inputs=[input, thinking_btn_state, model_selector_state, state],
                                outputs=[input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state])
    input.cancel(fn=Gradio_Events.cancel, inputs=[state],
                 outputs=[input, conversation_delete_menu_item, clear_btn, conversations, add_conversation_btn, chatbot, state],
                 cancels=[submit_event, regenerating_event], queue=False)

    clear_btn.click(fn=Gradio_Events.clear_conversation_history, inputs=[state], outputs=[chatbot, state])

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=100, max_size=100).launch(ssr_mode=False, max_threads=100, show_api=False)