Spaces:

WellGoods
/

VibeThinker

Sleeping

App Files Files Community

VladBoyko commited on 24 days ago

Commit

41ff223

verified ·

1 Parent(s): c1535fc

Create app.py

Browse files

Files changed (1) hide show

app.py +198 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+import spaces
+class VibeThinker:
+    def __init__(self, model_path="WeiboAI/VibeThinker-1.5B"):
+        self.model_path = model_path
+        print("Loading model... This may take a minute.")
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_path,
+            low_cpu_mem_usage=True,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            trust_remote_code=True
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_path,
+            trust_remote_code=True
+        )
+        print(f"Model loaded successfully!")
+        print(f"Using device: {self.model.device}")
+        if torch.cuda.is_available():
+            print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+    @spaces.GPU  # This decorator allocates GPU when function is called (for ZeroGPU spaces)
+    def infer_text(self, prompt, temperature=0.6, max_tokens=40960, top_p=0.95):
+        """
+        Generate response for a given prompt
+        Args:
+            prompt: The input question (preferably in English)
+            temperature: Controls randomness (0.6 or 1.0 recommended)
+            max_tokens: Maximum tokens to generate
+            top_p: Nucleus sampling parameter
+        """
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
+        generation_config = dict(
+            max_new_tokens=max_tokens,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=None  # Set to -1 in vLLM/SGLang
+        )
+        print(f"Generating response with temperature={temperature}, max_tokens={max_tokens}...")
+        generated_ids = self.model.generate(
+            **model_inputs,
+            generation_config=GenerationConfig(**generation_config)
+        )
+        generated_ids = [
+            output_ids[len(input_ids):]
+            for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+        ]
+        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return response
+# Initialize model
+print("Initializing VibeThinker-1.5B...")
+model = VibeThinker()
+# Create Gradio interface
+def generate_response(prompt, temperature, max_tokens, top_p):
+    if not prompt.strip():
+        return "Please enter a question."
+    try:
+        response = model.infer_text(
+            prompt=prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p
+        )
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio UI
+with gr.Blocks(title="VibeThinker-1.5B Math & Code Reasoning") as demo:
+    gr.Markdown("""
+    # 🧠 VibeThinker-1.5B: Reasoning Model
+    **Optimized for**: Competitive math problems and algorithm coding challenges
+    **Note**: This model works best with questions in English. It's specifically trained for
+    mathematical reasoning and competitive programming tasks.
+    ### Example Prompts:
+    - "Solve: Find all solutions to x^3 - 3x^2 + 4 = 0"
+    - "Write a Python function to find the longest palindromic substring in O(n^2) time"
+    - "Prove that the sum of angles in a triangle equals 180 degrees"
+    [GitHub](https://github.com/WeiboAI/VibeThinker) | [HuggingFace Model](https://huggingface.co/WeiboAI/VibeThinker-1.5B) | [Paper](https://huggingface.co/papers/2511.06221)
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            prompt_input = gr.Textbox(
+                label="Your Question",
+                placeholder="Enter your math problem or coding challenge here (in English)...",
+                lines=5
+            )
+            with gr.Accordion("Advanced Settings", open=False):
+                temperature_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.5,
+                    value=0.6,
+                    step=0.1,
+                    label="Temperature (0.6 or 1.0 recommended)"
+                )
+                max_tokens_slider = gr.Slider(
+                    minimum=512,
+                    maximum=40960,
+                    value=8192,
+                    step=512,
+                    label="Max Tokens (model supports up to 40,960)"
+                )
+                top_p_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.95,
+                    step=0.05,
+                    label="Top P"
+                )
+            submit_btn = gr.Button("🚀 Generate Solution", variant="primary")
+            clear_btn = gr.Button("🗑️ Clear")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="Model Response",
+                lines=20,
+                show_copy_button=True
+            )
+    # Example questions
+    gr.Examples(
+        examples=[
+            ["Find the number of positive integers n ≤ 1000 such that n^2 + n + 41 is prime.", 0.6, 8192, 0.95],
+            ["Write an efficient algorithm to solve the 0-1 knapsack problem using dynamic programming.", 0.6, 8192, 0.95],
+            ["Prove that √2 is irrational using proof by contradiction.", 0.6, 8192, 0.95],
+            ["A tank can be filled by pipe A in 3 hours and pipe B in 5 hours. If both pipes are opened together, how long will it take to fill the tank?", 0.6, 8192, 0.95],
+        ],
+        inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
+        label="Example Problems"
+    )
+    # Event handlers
+    submit_btn.click(
+        fn=generate_response,
+        inputs=[prompt_input, temperature_slider, max_tokens_slider, top_p_slider],
+        outputs=output_text
+    )
+    clear_btn.click(
+        fn=lambda: ("", ""),
+        inputs=[],
+        outputs=[prompt_input, output_text]
+    )
+    gr.Markdown("""
+    ---
+    ### 📊 Model Performance Highlights:
+    - **AIME24**: 80.3 (vs DeepSeek R1: 79.8)
+    - **AIME25**: 74.4 (vs DeepSeek R1: 70.0)
+    - **LiveCodeBench v6**: 51.1
+    - **Parameters**: Only 1.5B (400x smaller than DeepSeek R1!)
+    **Training Cost**: $7,800 USD | **License**: MIT
+    """)
+# Launch the app
+if __name__ == "__main__":
+    demo.queue()  # Enable queuing for better UX
+    demo.launch()