Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
| from threading import Thread | |
| from transformers import TextIteratorStreamer | |
| # ZeroGPU support for Hugging Face Spaces (effect-free if not in Spaces) | |
| try: | |
| import spaces | |
| GPU_DECORATOR = spaces.GPU(duration=30) | |
| print("[SUCCESS] ZeroGPU support enabled") | |
| except ImportError: | |
| # Fallback for local execution | |
| def GPU_DECORATOR(func): | |
| return func | |
| print("[WARNING] Running without ZeroGPU (local mode)") | |
| # Language-specific system prompts (extracted from training set) | |
| LANGUAGE_SYSTEM_PROMPTS = { | |
| # Germanic languages | |
| "en": "You are a helpful AI assistant.", | |
| "de": "Sie sind ein hilfreicher KI-Assistent.", | |
| "nl": "Je bent een behulpzame AI-assistent.", | |
| "da": "Du er en hjælpsom AI-assistent.", | |
| "sv": "Du är en hjälpsam AI-assistent.", | |
| "no": "Du er en hjelpsom AI-assistent.", | |
| # Romance languages | |
| "fr": "Vous êtes un assistant IA utile.", | |
| "es": "Eres un asistente de IA útil.", | |
| "it": "Sei un assistente AI utile.", | |
| "pt": "Você é um assistente de IA prestativo.", | |
| "ro": "Ești un asistent AI de ajutor.", | |
| # Slavic languages | |
| "pl": "Jesteś pomocnym asystentem AI.", | |
| "cs": "Jste užitečný AI asistent.", | |
| "sk": "Ste užitočný AI asistent.", | |
| "bg": "Вие сте полезен AI асистент.", | |
| "hr": "Vi ste korisni AI asistent.", | |
| "sl": "Vi ste koristen AI asistent.", | |
| # Baltic languages | |
| "lv": "Tu esi izpalīdzīgs mākslīgā intelekta asistents.", | |
| "lt": "Jūs esate naudingas dirbtinio intelekto asistentas.", | |
| "et": "Olete abivalmis tehisintellekti assistent.", | |
| # Other European languages | |
| "fi": "Olet avulias tekoälyavustaja.", | |
| "hu": "Ön egy segítőkész AI asszisztens.", | |
| "el": "Είστε ένας χρήσιμος βοηθός AI.", | |
| "mt": "Inti assistent tal-AI utli.", | |
| # Eastern European | |
| "ru": "Вы полезный ИИ-ассистент.", | |
| "uk": "Ви корисний AI-асистент.", | |
| # Asian languages (if needed) | |
| "zh": "你是一个有用的AI助手。", | |
| "ja": "あなたは役に立つAIアシスタントです。", | |
| "ko": "당신은 유용한 AI 어시스턴트입니다.", | |
| "hi": "आप एक उपयोगी एआई सहायक हैं।", | |
| } | |
| # Model loading | |
| MODEL_PATH = "martinsu/tildeopen-30b-mu-instruct" | |
| print(f"Loading model from {MODEL_PATH}...") | |
| # Configure 4-bit quantization for memory efficiency | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True, # Nested quantization for extra memory savings | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_PATH, | |
| quantization_config=quantization_config, | |
| device_map="auto", | |
| ) | |
| print("[SUCCESS] Model loaded!") | |
| def chat(message, history, temperature, language): | |
| """Generate response with configurable temperature and language. | |
| This function is decorated with @spaces.GPU for dynamic GPU allocation | |
| in Hugging Face Spaces (30 second duration). Effect-free locally. | |
| """ | |
| # Gradio ChatInterface passes history with content as string or list[dict] | |
| # Extract text content for apply_chat_template | |
| def extract_text(content): | |
| if isinstance(content, str): | |
| return content | |
| elif isinstance(content, list): | |
| # List of content items: [{"type": "text", "text": "..."}] | |
| texts = [item.get("text", "") for item in content if isinstance(item, dict) and item.get("type") == "text"] | |
| return " ".join(texts) | |
| return "" | |
| # Get system prompt for selected language | |
| system_prompt = LANGUAGE_SYSTEM_PROMPTS.get(language, LANGUAGE_SYSTEM_PROMPTS["en"]) | |
| # Build messages with system prompt and text-only content | |
| messages = [{"role": "system", "content": system_prompt}] | |
| messages.extend([{"role": msg["role"], "content": extract_text(msg["content"])} for msg in history]) | |
| messages.append({"role": "user", "content": extract_text(message) if not isinstance(message, str) else message}) | |
| # Apply chat template (tokenizer handles the formatting) | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| # Tokenize | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| # Generate with streaming | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| inputs, | |
| streamer=streamer, | |
| max_new_tokens=512, | |
| temperature=temperature, | |
| top_p=0.9, | |
| do_sample=True | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial = "" | |
| for new_text in streamer: | |
| partial += new_text | |
| yield partial | |
| # Prepare language options (sorted by priority) | |
| language_options = [ | |
| ("English", "en"), | |
| ("Latviešu (Latvian)", "lv"), | |
| ("Deutsch (German)", "de"), | |
| ("Français (French)", "fr"), | |
| ("Español (Spanish)", "es"), | |
| ("Italiano (Italian)", "it"), | |
| ("Polski (Polish)", "pl"), | |
| ("Português (Portuguese)", "pt"), | |
| ("Nederlands (Dutch)", "nl"), | |
| ("Svenska (Swedish)", "sv"), | |
| ("Česky (Czech)", "cs"), | |
| ("Română (Romanian)", "ro"), | |
| ("Dansk (Danish)", "da"), | |
| ("Suomi (Finnish)", "fi"), | |
| ("Magyar (Hungarian)", "hu"), | |
| ("Ελληνικά (Greek)", "el"), | |
| ("Български (Bulgarian)", "bg"), | |
| ("Lietuvių (Lithuanian)", "lt"), | |
| ("Eesti (Estonian)", "et"), | |
| ("Русский (Russian)", "ru"), | |
| ] | |
| # Chat interface with additional inputs always visible | |
| demo = gr.ChatInterface( | |
| chat, | |
| title="TildeOpen-30B Chat", | |
| description="Multilingual chat model supporting European languages (4-bit quantized), please select appropriate system prompt language (Swedish, Latvian, Estonian), default is English, that will help model to predict more desirable tokens.", | |
| additional_inputs=[ | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Dropdown(choices=language_options, value="en", label="Language / System Prompt"), | |
| ], | |
| additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=True), | |
| submit_btn=True, | |
| stop_btn=True, | |
| autofocus=True, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |