import base64 from http import HTTPStatus import os import uuid import time from datetime import datetime, timedelta import torch import gradio as gr from gradio_client import utils as client_utils import modelscope_studio.components.antd as antd import modelscope_studio.components.antdx as antdx import modelscope_studio.components.base as ms import modelscope_studio.components.pro as pro from config import DEFAULT_THEME, LIGHT_THEME, DEFAULT_SYS_PROMPT,DEFAULT_MODEL_SIZE, save_history, user_config, bot_config, welcome_config, markdown_config, upload_config, MINISTRAL_MODELS from ui_components.thinking_button import ThinkingButton import spaces try: from transformers import Mistral3ForConditionalGeneration, AutoProcessor, AutoTokenizer, TextIteratorStreamer from huggingface_hub import hf_hub_download from threading import Thread TRANSFORMERS_AVAILABLE = True except ImportError: TRANSFORMERS_AVAILABLE = False print("Warning: transformers not available. Running in demo mode only.") MODEL_CACHE = {} SYSTEM_PROMPT_CACHE = {} PROCESSOR_CACHE = {} print("=" * 50) print("๐Ÿš€ Ministral Demo Starting") print(f" Model cache initialized (empty)") print(f" Processor cache initialized (empty)") print(f" System prompt cache initialized (empty)") print("=" * 50) def log_with_time(message: str): timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] print(f"[{timestamp}] {message}") def log_cache_status(): log_with_time(f"๐Ÿ“ฆ Cache status: {len(MODEL_CACHE)} models, {len(PROCESSOR_CACHE)} processors, {len(SYSTEM_PROMPT_CACHE)} prompts cached") def load_system_prompt(model_id: str) -> str: cache_key = model_id if cache_key in SYSTEM_PROMPT_CACHE: log_with_time(f"๐Ÿ“‹ System prompt cache hit for {model_id.split('/')[-1]}") cached_prompt = SYSTEM_PROMPT_CACHE[cache_key] today = datetime.today().strftime("%Y-%m-%d") yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") model_name = model_id.split("/")[-1] return cached_prompt.format(name=model_name, today=today, yesterday=yesterday) try: log_with_time(f"๐Ÿ“ฅ Downloading system prompt for {model_id.split('/')[-1]}...") start = time.time() file_path = hf_hub_download(repo_id=model_id, filename="SYSTEM_PROMPT.txt") with open(file_path, "r") as file: system_prompt = file.read() SYSTEM_PROMPT_CACHE[cache_key] = system_prompt today = datetime.today().strftime("%Y-%m-%d") yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d") model_name = model_id.split("/")[-1] log_with_time(f"โœ… System prompt loaded in {time.time() - start:.2f}s") return system_prompt.format(name=model_name, today=today, yesterday=yesterday) except Exception as e: log_with_time(f"โš ๏ธ Could not load system prompt: {e}") return DEFAULT_SYS_PROMPT def get_processor_and_tokenizer(model_id: str): if model_id in PROCESSOR_CACHE: log_with_time(f"๐Ÿ“‹ Processor cache hit for {model_id.split('/')[-1]}") return PROCESSOR_CACHE[model_id] try: log_with_time(f"๐Ÿ“ฅ Loading processor for {model_id.split('/')[-1]}...") start = time.time() processor = AutoProcessor.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) PROCESSOR_CACHE[model_id] = (processor, tokenizer) log_with_time(f"โœ… Processor loaded in {time.time() - start:.2f}s") return processor, tokenizer except Exception as e: log_with_time(f"โŒ Error loading processor: {e}") return None, None def get_model_and_processor(model_id: str, show_notification=False): if not TRANSFORMERS_AVAILABLE: log_with_time("โš ๏ธ Transformers not available") return None, None, None if model_id in MODEL_CACHE: log_with_time(f"๐Ÿ“‹ Model cache hit for {model_id.split('/')[-1]}") return MODEL_CACHE[model_id] model_name = model_id.split("/")[-1] try: if show_notification: gr.Info(f"๐Ÿ“ฅ Loading {model_name}... This may take a few minutes on first use.", duration=15) total_start = time.time() processor, tokenizer = get_processor_and_tokenizer(model_id) if processor is None or tokenizer is None: return None, None, None log_with_time(f"๐Ÿ“ฅ Loading model weights for {model_name}...") model_start = time.time() model = Mistral3ForConditionalGeneration.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto" ).eval() log_with_time(f"โœ… Model weights loaded in {time.time() - model_start:.2f}s") MODEL_CACHE[model_id] = (model, processor, tokenizer) total_time = time.time() - total_start log_with_time(f"๐ŸŽ‰ {model_name} fully loaded in {total_time:.2f}s (cached for future use)") if show_notification: gr.Info(f"โœ… {model_name} loaded and ready!", duration=3) return model, processor, tokenizer except Exception as e: log_with_time(f"โŒ Error loading model {model_id}: {e}") if show_notification: gr.Warning(f"โŒ Failed to load model: {str(e)}", duration=10) return None, None, None def encode_file_to_base64(file_path): with open(file_path, "rb") as file: mime_type = client_utils.get_mimetype(file_path) bae64_data = base64.b64encode(file.read()).decode("utf-8") return f"data:{mime_type};base64,{bae64_data}" def format_history_for_transformers(history, model_id): system_prompt = load_system_prompt(model_id) messages = [{ "role": "system", "content": [{"type": "text", "text": system_prompt}] }] for item in history: if item["role"] == "user": content = [] text_content = item["content"][1]["content"] content.append({"type": "text", "text": text_content}) for file_path in item["content"][0]["content"]: if file_path.startswith("http"): content.append({"type": "image", "url": file_path}) elif os.path.exists(file_path): mime_type = client_utils.get_mimetype(file_path) if mime_type.startswith("image"): content.append({"type": "image", "url": file_path}) messages.append({ "role": "user", "content": content }) elif item["role"] == "assistant": text_contents = [content["content"] for content in item["content"] if content["type"] == "text"] if text_contents: messages.append({ "role": "assistant", "content": [{"type": "text", "text": " ".join(text_contents)}] }) return messages def prepare_inputs(processor, messages): log_with_time("๐Ÿ“‹ Preparing inputs...") inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt" ) if 'token_type_ids' in inputs: del inputs['token_type_ids'] return inputs @spaces.GPU(duration=180) def generate_streaming(model, processor, tokenizer, messages): log_with_time("๐Ÿš€ Starting streaming generation...") inputs = prepare_inputs(processor, messages) log_with_time("๐Ÿ“ค Moving tensors to GPU...") inputs = { k: (v.to(model.device, dtype=torch.bfloat16) if v.is_floating_point() else v.to(model.device)) for k, v in inputs.items() } streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( **inputs, streamer=streamer, max_new_tokens=2048, temperature=0.15, do_sample=True, ) log_with_time("๐Ÿงต Starting generation thread...") thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() log_with_time("๐Ÿ“ Streaming tokens...") generated_text = "" for token in streamer: generated_text += token yield {"token": token, "full_text": generated_text, "done": False} thread.join() log_with_time(f"โœ… Generation complete: {len(generated_text)} chars") yield {"token": "", "full_text": generated_text, "done": True} class Gradio_Events: @staticmethod def submit(state_value): history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] enable_thinking = state_value["conversation_contexts"][state_value["conversation_id"]]["enable_thinking"] model_size = state_value["conversation_contexts"][state_value["conversation_id"]].get("model_size", "14B") model_type = "reasoning" if enable_thinking else "instruct" model_id = MINISTRAL_MODELS[model_size][model_type] log_cache_status() log_with_time(f"๐Ÿ“ Formatting {len(history)} messages for {model_id.split('/')[-1]}") messages = format_history_for_transformers(history, model_id) log_with_time(f"๐Ÿ“จ {len(messages)} messages prepared (including system prompt)") history.append({ "role": "assistant", "content": [], "key": str(uuid.uuid4()), "loading": True, "header": f"Ministral-3-{model_size}", "status": "pending" }) yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } try: start_time = time.time() answer_content = "" if TRANSFORMERS_AVAILABLE: needs_download = model_id not in MODEL_CACHE model, processor, tokenizer = get_model_and_processor(model_id, show_notification=needs_download) else: model, processor, tokenizer = None, None, None if model is not None and processor is not None and tokenizer is not None: log_with_time(f"๐Ÿš€ Starting inference with {model_id.split('/')[-1]}") try: inference_start = time.time() history[-1]["content"] = [{ "type": "text", "content": "", }] history[-1]["loading"] = False for chunk in generate_streaming(model, processor, tokenizer, messages): if chunk.get("token"): answer_content = chunk["full_text"] history[-1]["content"][0]["content"] = answer_content yield { chatbot: gr.update(value=history), state: gr.update(value=state_value) } if chunk.get("done"): answer_content = chunk["full_text"] break inference_time = time.time() - inference_start log_with_time(f"โœ… Streaming generation completed in {inference_time:.2f}s") except Exception as e: log_with_time(f"โŒ Model inference error: {e}") error_str = str(e) if "timeout" in error_str.lower() or "aborted" in error_str.lower(): answer_content = "โฑ๏ธ GPU timeout: The request took too long to process. Please try:\n\n1. Using a shorter prompt\n2. Reducing image sizes\n3. Trying again in a moment" elif "memory" in error_str.lower() or "oom" in error_str.lower(): answer_content = "๐Ÿ’พ GPU out of memory. Try using a smaller model or reducing input size." else: answer_content = f"โŒ Model inference failed: {error_str}\n\nPlease try again or check the console for more details." history[-1]["content"] = [{ "type": "text", "content": answer_content, }] history[-1]["loading"] = False yield { chatbot: gr.update(value=history), state: gr.update(value=state_value) } else: log_with_time(f"โš ๏ธ Using demo mode for: {model_id}") demo_answer = f"This is a demo response from {model_id}. The application is running in demo mode.\n\nTo use real models, install transformers: `pip install transformers torch`\n\nYour message: {messages[-1]['content'][0]['text'] if messages and messages[-1]['content'] else 'N/A'}" history[-1]["content"] = [{ "type": "text", "content": "", }] for char in demo_answer: answer_content += char history[-1]["content"][0]["content"] = answer_content history[-1]["loading"] = False yield { chatbot: gr.update(value=history), state: gr.update(value=state_value) } time.sleep(0.01) log_with_time(f"๐Ÿ“Š Response generated: {len(answer_content)} chars") history[-1]["status"] = "done" cost_time = "{:.2f}".format(time.time() - start_time) log_with_time(f"โฑ๏ธ Total request time: {cost_time}s") history[-1]["footer"] = f"{cost_time}s" yield { chatbot: gr.update(value=history), state: gr.update(value=state_value), } except Exception as e: log_with_time(f"โŒ Request failed for {model_id.split('/')[-1]}: {e}") history[-1]["loading"] = False history[-1]["status"] = "done" if not history[-1]["content"]: history[-1]["content"] = [] history[-1]["content"].append({ "type": "text", "content": f'Error: {str(e)}' }) yield { chatbot: gr.update(value=history), state: gr.update(value=state_value) } @staticmethod def add_message(input_value, thinking_btn_state_value, model_selector_state_value, state_value): text = input_value["text"] files = input_value["files"] if not state_value["conversation_id"]: random_id = str(uuid.uuid4()) history = [] state_value["conversation_id"] = random_id state_value["conversation_contexts"][state_value["conversation_id"]] = {"history": history} state_value["conversations"].append({"label": text, "key": random_id}) history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] state_value["conversation_contexts"][state_value["conversation_id"]] = { "history": history, "enable_thinking": thinking_btn_state_value["enable_thinking"], "model_size": model_selector_state_value["model_size"] } history.append({ "key": str(uuid.uuid4()), "role": "user", "content": [{"type": "file", "content": [f for f in files]}, {"type": "text", "content": text}] }) yield Gradio_Events.preprocess_submit(clear_input=True)(state_value) try: for chunk in Gradio_Events.submit(state_value): yield chunk except Exception as e: raise e finally: yield Gradio_Events.postprocess_submit(state_value) @staticmethod def preprocess_submit(clear_input=True): def preprocess_submit_handler(state_value): history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] return { **({input: gr.update(value=None, loading=True) if clear_input else gr.update(loading=True)} if clear_input else {}), conversations: gr.update(active_key=state_value["conversation_id"], items=list(map(lambda item: {**item, "disabled": True if item["key"] != state_value["conversation_id"] else False}, state_value["conversations"]))), add_conversation_btn: gr.update(disabled=True), clear_btn: gr.update(disabled=True), conversation_delete_menu_item: gr.update(disabled=True), chatbot: gr.update(value=history, bot_config=bot_config(disabled_actions=['edit', 'retry', 'delete']), user_config=user_config(disabled_actions=['edit', 'delete'])), state: gr.update(value=state_value), } return preprocess_submit_handler @staticmethod def postprocess_submit(state_value): history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] return { input: gr.update(loading=False), conversation_delete_menu_item: gr.update(disabled=False), clear_btn: gr.update(disabled=False), conversations: gr.update(items=state_value["conversations"]), add_conversation_btn: gr.update(disabled=False), chatbot: gr.update(value=history, bot_config=bot_config(), user_config=user_config()), state: gr.update(value=state_value), } @staticmethod def cancel(state_value): history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] history[-1]["loading"] = False history[-1]["status"] = "done" history[-1]["footer"] = "Chat completion paused" return Gradio_Events.postprocess_submit(state_value) @staticmethod def delete_message(state_value, e: gr.EventData): index = e._data["payload"][0]["index"] history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] history = history[:index] + history[index + 1:] state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = history return gr.update(value=state_value) @staticmethod def edit_message(state_value, chatbot_value, e: gr.EventData): index = e._data["payload"][0]["index"] history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] history[index]["content"] = chatbot_value[index]["content"] if not history[index].get("edited"): history[index]["edited"] = True history[index]["footer"] = ((history[index]["footer"]) + " " if history[index].get("footer") else "") + "Edited" return gr.update(value=state_value), gr.update(value=history) @staticmethod def regenerate_message(thinking_btn_state_value, model_selector_state_value, state_value, e: gr.EventData): index = e._data["payload"][0]["index"] history = state_value["conversation_contexts"][state_value["conversation_id"]]["history"] history = history[:index] state_value["conversation_contexts"][state_value["conversation_id"]] = { "history": history, "enable_thinking": thinking_btn_state_value["enable_thinking"], "model_size": model_selector_state_value["model_size"] } yield Gradio_Events.preprocess_submit()(state_value) try: for chunk in Gradio_Events.submit(state_value): yield chunk except Exception as e: raise e finally: yield Gradio_Events.postprocess_submit(state_value) @staticmethod def apply_prompt(e: gr.EventData, input_value): input_value["text"] = e._data["payload"][0]["value"]["description"] input_value["files"] = e._data["payload"][0]["value"]["urls"] return gr.update(value=input_value) @staticmethod def new_chat(thinking_btn_state, model_selector_state, state_value): if not state_value["conversation_id"]: return gr.skip() state_value["conversation_id"] = "" thinking_btn_state["enable_thinking"] = True model_selector_state["model_size"] = "14B" return gr.update(active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=thinking_btn_state), gr.update(value="14B"), gr.update(value=state_value) @staticmethod def select_conversation(thinking_btn_state_value, model_selector_state_value, state_value, e: gr.EventData): active_key = e._data["payload"][0] if state_value["conversation_id"] == active_key or (active_key not in state_value["conversation_contexts"]): return gr.skip() state_value["conversation_id"] = active_key thinking_btn_state_value["enable_thinking"] = state_value["conversation_contexts"][active_key].get("enable_thinking", False) model_size = state_value["conversation_contexts"][active_key].get("model_size", "14B") model_selector_state_value["model_size"] = model_size return gr.update(active_key=active_key), gr.update(value=state_value["conversation_contexts"][active_key]["history"]), gr.update(value=thinking_btn_state_value), gr.update(value=model_size), gr.update(value=state_value) @staticmethod def click_conversation_menu(state_value, e: gr.EventData): conversation_id = e._data["payload"][0]["key"] operation = e._data["payload"][1]["key"] if operation == "delete": del state_value["conversation_contexts"][conversation_id] state_value["conversations"] = [item for item in state_value["conversations"] if item["key"] != conversation_id] if state_value["conversation_id"] == conversation_id: state_value["conversation_id"] = "" return gr.update(items=state_value["conversations"], active_key=state_value["conversation_id"]), gr.update(value=None), gr.update(value=state_value) else: return gr.update(items=state_value["conversations"]), gr.skip(), gr.update(value=state_value) return gr.skip() @staticmethod def clear_conversation_history(state_value): if not state_value["conversation_id"]: return gr.skip() state_value["conversation_contexts"][state_value["conversation_id"]]["history"] = [] return gr.update(value=None), gr.update(value=state_value) @staticmethod def update_browser_state(state_value): return gr.update(value=dict(conversations=state_value["conversations"], conversation_contexts=state_value["conversation_contexts"])) @staticmethod def apply_browser_state(browser_state_value, state_value): state_value["conversations"] = browser_state_value["conversations"] state_value["conversation_contexts"] = browser_state_value["conversation_contexts"] return gr.update(items=browser_state_value["conversations"]), gr.update(value=state_value) css = """ body, html { background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important; color: var(--ms-gr-ant-color-text, #000000) !important; } .gradio-container, .gradio-container.dark { padding: 0 !important; background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important; color: var(--ms-gr-ant-color-text, #000000) !important; } .gradio-container > main.fillable { padding: 0 !important; background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important; } #chatbot .ms-gr-ant-col, #chatbot .ms-gr-antd-col, #chatbot [class*="ms-gr-ant-col"] { padding-left: 0 !important; padding-right: 0 !important; background-color: transparent !important; } #chatbot { height: calc(100vh - 21px - 16px); max-height: 1500px; background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important; } #chatbot .chatbot-conversations { height: 100vh; background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important; padding-left: 4px; padding-right: 4px; } #chatbot .chatbot-conversations .chatbot-conversations-list { padding-left: 0; padding-right: 0; } #chatbot .chatbot-chat { padding: 32px; padding-bottom: 0; height: 100%; background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important; color: var(--ms-gr-ant-color-text, #000000) !important; } @media (max-width: 768px) { #chatbot .chatbot-chat { padding: 10px; } } #chatbot .chatbot-chat .chatbot-chat-messages { flex: 1; background-color: transparent !important; } .gradio-container .contain { background-color: transparent !important; padding: 0 !important; } .user-message-content { background-color: #ffffff !important; background: #ffffff !important; border-radius: 16px !important; padding: 14px 18px !important; border: 1px solid #E9E2CB !important; color: #1E1E1E !important; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06) !important; max-width: fit-content; } [class*="chatbot"] [class*="user"] [class*="content"], [class*="chatbot"] [class*="user"] [class*="bubble"], [class*="chatbot"] [class*="user"] [class*="message"], [class*="pro-chatbot"] [class*="user"] { background-color: transparent !important; background: transparent !important; } .user-message-content, .user-message-content *:not(code):not(pre) { background-color: inherit !important; } .chatbot-welcome-prompts { background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important; } [class*="welcome"] [class*="prompt"], [class*="welcome"] [class*="prompts"], [class*="prompts"] [class*="item"], [class*="prompts"] [class*="card"], .ms-gr-antdx-prompts-item, .ms-gr-pro-chatbot-welcome-prompts { color: #1E1E1E !important; } [class*="welcome"] span, [class*="welcome"] p, [class*="welcome"] div, [class*="prompts"] span, [class*="prompts"] p, [class*="prompts"] div, [class*="prompts"] [class*="title"], [class*="prompts"] [class*="description"], .ms-gr-antdx-prompts-item span, .ms-gr-antdx-prompts-item p, .ms-gr-antdx-prompts-item div { color: #1E1E1E !important; } [class*="prompts"] [class*="item"] { background-color: #FFFAEB !important; border: 1px solid #E9E2CB !important; } .chatbot-conversations { background-color: var(--ms-gr-ant-color-bg-container, #FFF0C3) !important; } .chatbot-conversations .ms-gr-ant-typography { color: var(--ms-gr-ant-color-text, #000000) !important; } .chatbot-conversations .ms-gr-ant-btn-color-primary.ms-gr-ant-btn-variant-filled { background-color: var(--ms-gr-ant-color-primary, #FF8205) !important; color: #ffffff !important; border: none !important; } .chatbot-conversations .ms-gr-ant-btn-color-primary.ms-gr-ant-btn-variant-filled:hover { background-color: #FA500F !important; transform: translateY(-1px); } .chatbot-conversations .ms-gr-ant-conversations { color: var(--ms-gr-ant-color-text, #000000) !important; } .chatbot-conversations .ms-gr-ant-conversations-item { color: var(--ms-gr-ant-color-text, #000000) !important; background-color: transparent !important; } .chatbot-conversations .ms-gr-ant-conversations-item:hover { background-color: var(--ms-gr-ant-color-bg-elevated, #E9E2CB) !important; } .ant-typography { color: var(--ms-gr-ant-color-text, #000000) !important; } .ant-flex { color: var(--ms-gr-ant-color-text, #000000) !important; } #chatbot > .ant-col { background-color: var(--ms-gr-ant-color-bg-layout, #FFFAEB) !important; } h1, h2, h3, h4, h5, h6, p, span { color: var(--ms-gr-ant-color-text); } .ms-gr-pro-chatbot-bot, .ms-gr-pro-chatbot-bot *, .ms-gr-pro-chatbot-bot .ms-gr-pro-chatbot-message-content, .ms-gr-pro-chatbot-bot [class*="content"], [class*="chatbot"] [class*="bot"], [class*="chatbot"] [class*="bot"] * { color: #1E1E1E !important; } .ms-gr-pro-chatbot-bot h1, .ms-gr-pro-chatbot-bot h2, .ms-gr-pro-chatbot-bot h3, .ms-gr-pro-chatbot-bot h4, .ms-gr-pro-chatbot-bot h5, .ms-gr-pro-chatbot-bot h6, .ms-gr-pro-chatbot-bot strong, .ms-gr-pro-chatbot-bot b, .ms-gr-pro-chatbot-bot em, .ms-gr-pro-chatbot-bot i, .ms-gr-pro-chatbot-bot p, .ms-gr-pro-chatbot-bot span, .ms-gr-pro-chatbot-bot li, .ms-gr-pro-chatbot-bot ul, .ms-gr-pro-chatbot-bot ol, .ms-gr-pro-chatbot-bot a, [class*="chatbot"] [class*="bot"] h1, [class*="chatbot"] [class*="bot"] h2, [class*="chatbot"] [class*="bot"] h3, [class*="chatbot"] [class*="bot"] strong, [class*="chatbot"] [class*="bot"] b, [class*="chatbot"] [class*="bot"] p, [class*="chatbot"] [class*="bot"] span, [class*="chatbot"] [class*="bot"] li { color: #1E1E1E !important; } .ms-gr-pro-chatbot-bot [style*="color"], [class*="chatbot"] [class*="bot"] [style*="color"], [class*="chatbot"] [class*="bot"] [style] { color: #1E1E1E !important; } .ms-gr-pro-chatbot-bot pre, .ms-gr-pro-chatbot-bot pre code { background-color: #E9E2CB !important; color: #1E1E1E !important; } .ms-gr-pro-chatbot-bot code:not(pre code) { background-color: #E9E2CB !important; color: #1E1E1E !important; padding: 2px 6px; border-radius: 4px; } footer { display: none !important; } .footer { display: none !important; } *, *::before, *::after { transition: none !important; } """ with gr.Blocks( fill_width=True, css=css, theme=gr.themes.Default(primary_hue="orange", secondary_hue="gray", neutral_hue="gray") ) as demo: state = gr.State({"conversation_contexts": {}, "conversations": [], "conversation_id": "", "oss_cache": {}}) thinking_btn_state = gr.State({"enable_thinking": False}) model_selector_state = gr.State({"model_size": "14B"}) with ms.Application(), antdx.XProvider(theme=LIGHT_THEME), ms.AutoLoading(): with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"): with antd.Col(md=dict(flex="0 0 260px", span=24, order=0), span=0, order=1, elem_style=dict(width=0)): with ms.Div(elem_classes="chatbot-conversations"): with antd.Flex(vertical=True, gap="small", elem_style=dict(height="100%")): with antd.Flex(vertical=True, gap="small", align="center", elem_style=dict(padding=8)): with antd.Typography.Title(level=1, elem_style=dict(fontSize=24, margin=0)): with antd.Flex(align="center", gap="small", justify="center"): antd.Image('./assets/m-boxed-rainbow.png', preview=False, alt="logo", width=24, height=24) ms.Span("Ministrals Demo") with antd.Button(value=None, color="primary", variant="filled", block=True) as add_conversation_btn: ms.Text("New Conversation") with ms.Slot("icon"): antd.Icon("PlusOutlined") with antdx.Conversations(elem_classes="chatbot-conversations-list") as conversations: with ms.Slot('menu.items'): with antd.Menu.Item(label="Delete", key="delete", danger=True) as conversation_delete_menu_item: with ms.Slot("icon"): antd.Icon("DeleteOutlined") with antd.Col(flex=1, elem_style=dict(height="100%")): with antd.Flex(vertical=True, gap="small", elem_classes="chatbot-chat"): with antd.Flex(align="center", gap="large", elem_style=dict(paddingBottom=10)): antd.Typography.Title("Hello, I'm Ministral", level=3, elem_style=dict(margin=0)) with antd.Flex(align="center", gap="small"): ms.Span("currently using:", elem_style=dict(fontSize=12)) model_display = antd.Typography.Text( value="mistralai/Ministral-3-14B-Instruct-2512", copyable=True, code=True, elem_style=dict(fontSize=12, color="var(--ms-gr-ant-color-text-secondary)") ) chatbot = pro.Chatbot(elem_classes="chatbot-chat-messages", height=0, markdown_config=markdown_config(), welcome_config=welcome_config(), user_config=user_config(), bot_config=bot_config()) with pro.MultimodalInput(placeholder="How can I help you today?", upload_config=upload_config()) as input: with ms.Slot("prefix"): with antd.Flex(gap=4, wrap=True, elem_style=dict(maxWidth='40vw', display="inline-flex")): with antd.Button(value=None, type="text") as clear_btn: with ms.Slot("icon"): antd.Icon("ClearOutlined") model_selector = antd.Select( value=DEFAULT_MODEL_SIZE, default_value=DEFAULT_MODEL_SIZE, options=[{"label": "Ministral-3-14B", "value": "14B"}, {"label": "Ministral-3-8B", "value": "8B"}, {"label": "Ministral-3-3B", "value": "3B"}], elem_style=dict(width=180) ) with antd.Button("Thinking", shape="round", color="primary") as thinking_btn: with ms.Slot("icon"): antd.Icon("SunOutlined") def toggle_thinking(state_value): state_value["enable_thinking"] = not state_value["enable_thinking"] return gr.update(value=state_value) def apply_thinking_style(state_value): return gr.update(variant="solid" if state_value["enable_thinking"] else "") def update_model_size(value, state_value): state_value["model_size"] = value return gr.update(value=state_value) def update_model_display(thinking_state, model_state): model_size = model_state.get("model_size", "14B") model_type = "reasoning" if thinking_state.get("enable_thinking", False) else "instruct" model_name = MINISTRAL_MODELS[model_size][model_type] return gr.update(value=model_name) thinking_btn_state.change(fn=apply_thinking_style, inputs=[thinking_btn_state], outputs=[thinking_btn]) thinking_btn_state.change(fn=update_model_display, inputs=[thinking_btn_state, model_selector_state], outputs=[model_display]) thinking_btn.click(fn=toggle_thinking, inputs=[thinking_btn_state], outputs=[thinking_btn_state]) model_selector.change(fn=update_model_size, inputs=[model_selector, model_selector_state], outputs=[model_selector_state]) model_selector_state.change(fn=update_model_display, inputs=[thinking_btn_state, model_selector_state], outputs=[model_display]) if save_history: browser_state = gr.BrowserState({"conversation_contexts": {}, "conversations": []}, storage_key="ministral_demo_storage") state.change(fn=Gradio_Events.update_browser_state, inputs=[state], outputs=[browser_state]) demo.load(fn=Gradio_Events.apply_browser_state, inputs=[browser_state, state], outputs=[conversations, state]) add_conversation_btn.click(fn=Gradio_Events.new_chat, inputs=[thinking_btn_state, model_selector_state, state], outputs=[conversations, chatbot, thinking_btn_state, model_selector, state]) conversations.active_change(fn=Gradio_Events.select_conversation, inputs=[thinking_btn_state, model_selector_state, state], outputs=[conversations, chatbot, thinking_btn_state, model_selector, state]) conversations.menu_click(fn=Gradio_Events.click_conversation_menu, inputs=[state], outputs=[conversations, chatbot, state]) chatbot.welcome_prompt_select(fn=Gradio_Events.apply_prompt, inputs=[input], outputs=[input]) chatbot.delete(fn=Gradio_Events.delete_message, inputs=[state], outputs=[state]) chatbot.edit(fn=Gradio_Events.edit_message, inputs=[state, chatbot], outputs=[state, chatbot]) regenerating_event = chatbot.retry(fn=Gradio_Events.regenerate_message, inputs=[thinking_btn_state, model_selector_state, state], outputs=[input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state]) submit_event = input.submit(fn=Gradio_Events.add_message, inputs=[input, thinking_btn_state, model_selector_state, state], outputs=[input, clear_btn, conversation_delete_menu_item, add_conversation_btn, conversations, chatbot, state]) input.cancel(fn=Gradio_Events.cancel, inputs=[state], outputs=[input, conversation_delete_menu_item, clear_btn, conversations, add_conversation_btn, chatbot, state], cancels=[submit_event, regenerating_event], queue=False) clear_btn.click(fn=Gradio_Events.clear_conversation_history, inputs=[state], outputs=[chatbot, state]) if __name__ == "__main__": demo.queue(default_concurrency_limit=100, max_size=100).launch(ssr_mode=False, max_threads=100, show_api=False)