Spaces:

zhijie3
/

D2F-LLaDA-Instruct-8B

Sleeping

App Files Files Community

UnhurriedDawn commited on Aug 8

Commit

952fb23

1 Parent(s): e041d24

init

Browse files

Files changed (1) hide show

app.py +295 -89

app.py CHANGED Viewed

@@ -90,22 +90,127 @@ class DreamLoRAInference:
     # CSS is exactly the same as your original script
     CSS = """
     /* Enhanced modern styling */
-    .main-container { max-width: 1400px; margin: 0 auto; padding: 20px; }
-    .output-text-container { background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%); border: 2px solid #e2e8f0; border-radius: 12px; padding: 20px; margin: 15px 0; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
-    .output-text-container textarea { background: white !important; border: 1px solid #cbd5e1 !important; border-radius: 8px !important; font-family: 'Inter', 'Segoe UI', sans-serif !important; font-size: 14px !important; line-height: 1.6 !important; padding: 16px !important; box-shadow: inset 0 2px 4px 0 rgba(0, 0, 0, 0.06) !important; }
-    .stats-card { background: linear-gradient(135deg, #ecfdf5 0%, #f0fdf4 100%); border: 2px solid #10b981; border-radius: 12px; padding: 20px; margin: 15px 0; box-shadow: 0 4px 6px -1px rgba(16, 185, 129, 0.1); }
-    .stats-card h3 { color: #065f46; margin-top: 0; margin-bottom: 15px; font-weight: 600; }
-    .stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 12px; margin-top: 10px; }
-    .stat-item { background: white; padding: 12px 16px; border-radius: 8px; border-left: 4px solid #10b981; box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1); }
-    .stat-label { font-size: 12px; color: #6b7280; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px; }
-    .stat-value { font-size: 18px; font-weight: 600; color: #065f46; font-family: 'Monaco', 'Menlo', monospace; }
-    .viz-container { background: linear-gradient(135deg, #fefefe 0%, #f9fafb 100%); border: 2px solid #e5e7eb; border-radius: 12px; padding: 20px; margin: 15px 0; height: 600px; overflow-y: auto; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); position: relative; }
-    .viz-header { background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%); color: white; padding: 12px 20px; margin: -20px -20px 20px -20px; border-radius: 12px 12px 0 0; font-weight: 600; font-size: 16px; display: flex; align-items: center; gap: 8px; }
-    .viz-header::before { content: "🎬"; font-size: 18px; }
-    .block-container { display: inline-block; border: 2px solid transparent; border-radius: 10px; padding: 8px; margin: 6px 2px; transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1); background: rgba(255, 255, 255, 0.8); backdrop-filter: blur(10px); }
-    .block-updating { border-color: #ff4500 !important; box-shadow: 0 0 20px rgba(255, 69, 0, 0.4); transform: scale(1.02); background: rgba(255, 245, 238, 0.9) !important; }
-    .token { padding: 4px 8px; margin: 2px; border-radius: 6px; display: inline-block; line-height: 1.5; font-family: 'Monaco', 'Menlo', monospace; font-size: 13px; font-weight: 500; transition: all 0.2s ease; }
-    .token:hover { transform: translateY(-1px); box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15); }
     .token.prompt { background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%); color: #475569; border: 1px solid #cbd5e1; }
     .token.gen-0 { background: linear-gradient(135deg, #dbeafe 0%, #bfdbfe 100%); color: #1e40af; border: 1px solid #60a5fa; }
     .token.gen-1 { background: linear-gradient(135deg, #d1fae5 0%, #a7f3d0 100%); color: #065f46; border: 1px solid #34d399; }
@@ -113,21 +218,96 @@ class DreamLoRAInference:
     .token.gen-3 { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b; border: 1px solid #f87171; }
     .token.gen-4 { background: linear-gradient(135deg, #e0e7ff 0%, #c7d2fe 100%); color: #3730a3; border: 1px solid #818cf8; }
     .token.gen-5 { background: linear-gradient(135deg, #f3e8ff 0%, #e9d5ff 100%); color: #6b21a8; border: 1px solid #c084fc; }
-    .token.mask { background: linear-gradient(135deg, #f9fafb 0%, #f3f4f6 100%); color: #9ca3af; border: 2px dashed #d1d5db; animation: pulse 2s infinite; }
-    @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.6; } }
-    .control-button { background: linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%) !important; border: none !important; color: white !important; padding: 12px 24px !important; border-radius: 10px !important; font-weight: 600 !important; font-size: 14px !important; box-shadow: 0 4px 6px -1px rgba(139, 92, 246, 0.3) !important; transition: all 0.3s ease !important; display: flex !important; align-items: center !important; gap: 8px !important; margin: 10px 0 !important; }
-    .control-button:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 15px -3px rgba(139, 92, 246, 0.4) !important; }
-    .control-button:active { transform: translateY(0) !important; }
-    .control-button::before { content: "🎮"; font-size: 16px; }
-    .param-card { background: white; border: 1px solid #e5e7eb; border-radius: 10px; padding: 16px; margin: 8px 0; box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1); }
-    .viz-container::-webkit-scrollbar { width: 12px; }
-    .viz-container::-webkit-scrollbar-track { background: #f1f5f9; border-radius: 6px; }
-    .viz-container::-webkit-scrollbar-thumb { background: linear-gradient(135deg, #94a3b8 0%, #64748b 100%); border-radius: 6px; border: 2px solid #f1f5f9; }
-    .viz-container::-webkit-scrollbar-thumb:hover { background: linear-gradient(135deg, #64748b 0%, #475569 100%); }
-    .generating-indicator { display: inline-flex; align-items: center; gap: 8px; color: #6366f1; font-weight: 500; }
-    .generating-indicator::after { content: ""; width: 12px; height: 12px; border: 2px solid #6366f1; border-top: 2px solid transparent; border-radius: 50%; animation: spin 1s linear infinite; }
-    @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
-    @media (max-width: 768px) { .main-container { padding: 10px; } .stats-grid { grid-template-columns: 1fr; } .viz-container { height: 400px; } }
     """
     def __init__(self, **kwargs):
@@ -184,35 +364,60 @@ class DreamLoRAInference:
                         block_states[next_block_id]['is_complete'] = True
     def _render_visualization_html(self, step: int, x_t: torch.Tensor, block_states: Dict, cache_length: int, updated_block_ids: Set[int]) -> str:
         timestamp = int(time.time() * 1000)
-        html_parts = [f'<div class="viz-header">Slow-Motion Generation Process</div>']
         for block_id in sorted(k for k in block_states.keys() if k > 0):
             state = block_states[block_id]
-            container_classes = ["block-container", "block-updating" if block_id in updated_block_ids else ""]
             html_parts.append(f'<div class="{" ".join(container_classes)}" id="block-{block_id}-{timestamp}">')
             block_tokens = x_t[0, state['start_pos']:state['end_pos']]
             for token_id in block_tokens:
                 token_id_int = token_id.item()
                 token_classes = ["token"]
                 if token_id_int == self.mask_token_id:
-                    token_str, _ = '░', token_classes.append("mask")
                 else:
                     token_str = self.tokenizer.decode([token_id_int], skip_special_tokens=False)
                     token_str = token_str.replace('&', '&').replace('<', '<').replace('>', '>')
                     token_classes.append(f"gen-{(block_id - 1) % 6}")
                 html_parts.append(f'<span class="{" ".join(token_classes)}">{token_str}</span>')
             html_parts.append('</div>')
         html_parts.append(f'<div class="scroll-anchor" id="viz-anchor-{timestamp}"></div>')
-        return f'<div class="viz-content" id="viz-content-{timestamp}">{"".join(html_parts)}</div>'
     @spaces.GPU
     @torch.inference_mode()
     def stream_and_capture_for_gradio(
-        self, prompt_text: str, max_new_tokens: int, block_size: int, block_add_threshold: float,
-        decoded_token_threshold: float, skip_threshold: float
     ) -> Iterator[Tuple[str, List[str], str, bool]]:
-        self._ensure_model_loaded() # Lazy load the model
         start_time = time.time()
         captured_frames: List[str] = []
@@ -295,12 +500,14 @@ class DreamLoRAInference:
             current_viz_html = self._render_visualization_html(step, x_t, block_states, cache_length, updated_block_ids)
             captured_frames.append(current_viz_html)
             yield live_text, captured_frames, f'<div class="generating-indicator">Generating... Step {step}</div>', False
         total_time = time.time() - start_time
         final_generated_ids = x_t[0, prompt_length:]
         eos_positions = (final_generated_ids == self.tokenizer.eos_token_id).nonzero()
-        if eos_positions.numel() > 0: final_generated_ids = final_generated_ids[:eos_positions[0, 0] + 1]
         final_text = self.tokenizer.decode(final_generated_ids, skip_special_tokens=True)
         final_viz_html = self._render_visualization_html(step, x_t, block_states, cache_length, set())
@@ -313,10 +520,22 @@ class DreamLoRAInference:
         <div class="stats-card">
             <h3>✅ Generation Complete!</h3>
             <div class="stats-grid">
-                <div class="stat-item"><div class="stat-label">Total Time</div><div class="stat-value">{total_time:.2f}s</div></div>
-                <div class="stat-item"><div class="stat-label">Tokens (incl. EOS)</div><div class="stat-value">{tokens_incl_eos}</div></div>
-                <div class="stat-item"><div class="stat-label">Tokens (excl. EOS)</div><div class="stat-value">{tokens_excl_eos}</div></div>
-                <div class="stat-item"><div class="stat-label">Tokens/Second</div><div class="stat-value">{(tokens_incl_eos / total_time):.1f}</div></div>
             </div>
         </div>
         """
@@ -344,46 +563,29 @@ if __name__ == "__main__":
             yield frame
             time.sleep(delay)
-    # Global auto-scroll JavaScript from your original code
     auto_scroll_js = """
     <script>
-    // This script contains functions to auto-scroll the live text output and visualization panels.
-    // It's injected into the page and listeners are set up to trigger on content change.
-    // The implementation details are complex and are omitted here for brevity,
-    // but the functionality is to keep the latest content in view.
-    // We can use a simpler version for robustness in HF Spaces.
-    function scrollElementToBottom(elementId) {
-        const element = document.getElementById(elementId);
-        if (element) {
-            const child = element.querySelector('textarea, .viz-content');
-            if(child) child.scrollTop = child.scrollHeight;
-            else element.scrollTop = element.scrollHeight;
-        }
     }
-    function setupObserver(containerId) {
-        const targetNode = document.getElementById(containerId);
-        if (!targetNode) return;
-        const config = { childList: true, subtree: true, characterData: true };
-        const callback = function(mutationsList, observer) {
-            scrollElementToBottom(containerId);
-        };
-        const observer = new MutationObserver(callback);
-        observer.observe(targetNode, config);
-    }
-    document.addEventListener('DOMContentLoaded', function() {
         setTimeout(() => {
-            // Target the specific textarea within the component for live text
-            const liveTextComponent = document.getElementById('live-text-output');
-            if (liveTextComponent) {
-                const ta = liveTextComponent.querySelector('textarea');
-                if (ta) setupObserver(ta.id);
-            }
-             // Target the visualization container
-            setupObserver('viz-container');
-        }, 1500); // Delay to ensure Gradio renders elements
     });
     </script>
     """
@@ -392,11 +594,10 @@ if __name__ == "__main__":
         html_frames_state = gr.State([])
         generation_complete_state = gr.State(False)
-        # Inject auto-scroll JavaScript - this is a more robust way for Spaces
-        # gr.HTML(auto_scroll_js) # This can be problematic, JS in HTML render is better.
         with gr.Column(elem_classes=["main-container"]):
-            # UI structure is identical to your original script
             gr.Markdown("# ✨ D2F: Faster-than-AR Inference for Diffusion LLMs")
             gr.Markdown(
                 """
@@ -413,7 +614,13 @@ if __name__ == "__main__":
             with gr.Row():
                 with gr.Column(scale=2):
-                    prompt_input = gr.Textbox(label="🤔 Enter your question", placeholder="Ask me anything! Try: 'Explain quantum physics' or 'Write a story about...'", lines=4, elem_classes=["param-card"])
                     with gr.Accordion("⚙️ Advanced Settings", open=False):
                         with gr.Row():
                             max_new_tokens_slider = gr.Slider(minimum=64, maximum=2048, value=1024, step=64, label="Max Tokens", info="Maximum number of tokens to generate")
@@ -424,6 +631,7 @@ if __name__ == "__main__":
                         with gr.Row():
                             skip_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.01, label="Skip Threshold", info="Token selection threshold")
                             delay_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.15, step=0.05, label="Playback Speed", info="Slow-motion playback delay (seconds)")
                     generate_button = gr.Button("🚀 Generate Text", variant="primary", size="lg")
                 with gr.Column(scale=3):
@@ -435,7 +643,7 @@ if __name__ == "__main__":
             with gr.Row():
                 with gr.Column():
                     slowmo_button = gr.Button("🎬 Watch Slow-Motion Generation Process", variant="secondary", size="lg", elem_classes=["control-button"], visible=False, interactive=False)
-                    with gr.Group(elem_classes=["viz-container"], visible=False, elem_id="viz-container") as viz_group:
                         visualization_output = gr.HTML(label="")
             # Examples are identical to your original script
@@ -449,7 +657,7 @@ if __name__ == "__main__":
                 label="💡 Try these examples"
             )
-        # Event handling is identical to your original script
         def update_slowmo_button_visibility(is_complete):
             return gr.update(visible=is_complete, interactive=is_complete)
@@ -461,9 +669,10 @@ if __name__ == "__main__":
             block_add_thresh_slider, decoded_token_thresh_slider, skip_thresh_slider
         ]
         generation_event = generate_button.click(
-            fn=lambda: [gr.update(value="", interactive=False), gr.update(visible=False, interactive=False), gr.update(visible=False), gr.update(value=None)],
-            outputs=[prompt_input, slowmo_button, viz_group, stats_output]
         ).then(
             fn=inference_engine.stream_and_capture_for_gradio,
             inputs=inputs_list,
@@ -472,9 +681,6 @@ if __name__ == "__main__":
             fn=update_slowmo_button_visibility,
             inputs=[generation_complete_state],
             outputs=[slowmo_button]
-        ).then(
-            fn=lambda: gr.update(interactive=True),
-            outputs=[prompt_input]
         )
         slowmo_event = slowmo_button.click(

     # CSS is exactly the same as your original script
     CSS = """
     /* Enhanced modern styling */
+    .main-container {
+        max-width: 1400px;
+        margin: 0 auto;
+        padding: 20px;
+    }
+    .output-text-container {
+        background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
+        border: 2px solid #e2e8f0;
+        border-radius: 12px;
+        padding: 20px;
+        margin: 15px 0;
+        box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+    }
+    .output-text-container textarea {
+        background: white !important;
+        border: 1px solid #cbd5e1 !important;
+        border-radius: 8px !important;
+        font-family: 'Inter', 'Segoe UI', sans-serif !important;
+        font-size: 14px !important;
+        line-height: 1.6 !important;
+        padding: 16px !important;
+        box-shadow: inset 0 2px 4px 0 rgba(0, 0, 0, 0.06) !important;
+    }
+    .stats-card {
+        background: linear-gradient(135deg, #ecfdf5 0%, #f0fdf4 100%);
+        border: 2px solid #10b981;
+        border-radius: 12px;
+        padding: 20px;
+        margin: 15px 0;
+        box-shadow: 0 4px 6px -1px rgba(16, 185, 129, 0.1);
+    }
+    .stats-card h3 {
+        color: #065f46;
+        margin-top: 0;
+        margin-bottom: 15px;
+        font-weight: 600;
+    }
+    .stats-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+        gap: 12px;
+        margin-top: 10px;
+    }
+    .stat-item {
+        background: white;
+        padding: 12px 16px;
+        border-radius: 8px;
+        border-left: 4px solid #10b981;
+        box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
+    }
+    .stat-label {
+        font-size: 12px;
+        color: #6b7280;
+        text-transform: uppercase;
+        letter-spacing: 0.5px;
+        margin-bottom: 4px;
+    }
+    .stat-value {
+        font-size: 18px;
+        font-weight: 600;
+        color: #065f46;
+        font-family: 'Monaco', 'Menlo', monospace;
+    }
+    .viz-container {
+        background: linear-gradient(135deg, #fefefe 0%, #f9fafb 100%);
+        border: 2px solid #e5e7eb;
+        border-radius: 12px;
+        padding: 20px;
+        margin: 15px 0;
+        height: 600px;
+        overflow-y: auto;
+        box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+        position: relative;
+    }
+    .viz-header {
+        background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%);
+        color: white;
+        padding: 12px 20px;
+        margin: -20px -20px 20px -20px;
+        border-radius: 12px 12px 0 0;
+        font-weight: 600;
+        font-size: 16px;
+        display: flex;
+        align-items: center;
+        gap: 8px;
+    }
+    .viz-header::before {
+        content: "🎬";
+        font-size: 18px;
+    }
+    .block-container {
+        display: inline-block;
+        border: 2px solid transparent;
+        border-radius: 10px;
+        padding: 8px;
+        margin: 6px 2px;
+        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+        background: rgba(255, 255, 255, 0.8);
+        backdrop-filter: blur(10px);
+    }
+    .block-updating {
+        border-color: #ff4500 !important;
+        box-shadow: 0 0 20px rgba(255, 69, 0, 0.4);
+        transform: scale(1.02);
+        background: rgba(255, 245, 238, 0.9) !important;
+    }
+    .token {
+        padding: 4px 8px;
+        margin: 2px;
+        border-radius: 6px;
+        display: inline-block;
+        line-height: 1.5;
+        font-family: 'Monaco', 'Menlo', monospace;
+        font-size: 13px;
+        font-weight: 500;
+        transition: all 0.2s ease;
+    }
+    .token:hover {
+        transform: translateY(-1px);
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15);
+    }
     .token.prompt { background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%); color: #475569; border: 1px solid #cbd5e1; }
     .token.gen-0 { background: linear-gradient(135deg, #dbeafe 0%, #bfdbfe 100%); color: #1e40af; border: 1px solid #60a5fa; }
     .token.gen-1 { background: linear-gradient(135deg, #d1fae5 0%, #a7f3d0 100%); color: #065f46; border: 1px solid #34d399; }
     .token.gen-3 { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); color: #991b1b; border: 1px solid #f87171; }
     .token.gen-4 { background: linear-gradient(135deg, #e0e7ff 0%, #c7d2fe 100%); color: #3730a3; border: 1px solid #818cf8; }
     .token.gen-5 { background: linear-gradient(135deg, #f3e8ff 0%, #e9d5ff 100%); color: #6b21a8; border: 1px solid #c084fc; }
+    .token.mask {
+        background: linear-gradient(135deg, #f9fafb 0%, #f3f4f6 100%);
+        color: #9ca3af;
+        border: 2px dashed #d1d5db;
+        animation: pulse 2s infinite;
+    }
+    @keyframes pulse {
+        0%, 100% { opacity: 1; }
+        50% { opacity: 0.6; }
+    }
+    .control-button {
+        background: linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%) !important;
+        border: none !important;
+        color: white !important;
+        padding: 12px 24px !important;
+        border-radius: 10px !important;
+        font-weight: 600 !important;
+        font-size: 14px !important;
+        box-shadow: 0 4px 6px -1px rgba(139, 92, 246, 0.3) !important;
+        transition: all 0.3s ease !important;
+        display: flex !important;
+        align-items: center !important;
+        gap: 8px !important;
+        margin: 10px 0 !important;
+    }
+    .control-button:hover {
+        transform: translateY(-2px) !important;
+        box-shadow: 0 8px 15px -3px rgba(139, 92, 246, 0.4) !important;
+    }
+    .control-button:active {
+        transform: translateY(0) !important;
+    }
+    .control-button::before {
+        content: "🎮";
+        font-size: 16px;
+    }
+    .param-card {
+        background: white;
+        border: 1px solid #e5e7eb;
+        border-radius: 10px;
+        padding: 16px;
+        margin: 8px 0;
+        box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
+    }
+    .viz-container::-webkit-scrollbar {
+        width: 12px;
+    }
+    .viz-container::-webkit-scrollbar-track {
+        background: #f1f5f9;
+        border-radius: 6px;
+    }
+    .viz-container::-webkit-scrollbar-thumb {
+        background: linear-gradient(135deg, #94a3b8 0%, #64748b 100%);
+        border-radius: 6px;
+        border: 2px solid #f1f5f9;
+    }
+    .viz-container::-webkit-scrollbar-thumb:hover {
+        background: linear-gradient(135deg, #64748b 0%, #475569 100%);
+    }
+    .generating-indicator {
+        display: inline-flex;
+        align-items: center;
+        gap: 8px;
+        color: #6366f1;
+        font-weight: 500;
+    }
+    .generating-indicator::after {
+        content: "";
+        width: 12px;
+        height: 12px;
+        border: 2px solid #6366f1;
+        border-top: 2px solid transparent;
+        border-radius: 50%;
+        animation: spin 1s linear infinite;
+    }
+    @keyframes spin {
+        0% { transform: rotate(0deg); }
+        100% { transform: rotate(360deg); }
+    }
+    @media (max-width: 768px) {
+        .main-container {
+            padding: 10px;
+        }
+        .stats-grid {
+            grid-template-columns: 1fr;
+        }
+        .viz-container {
+            height: 400px;
+        }
+    }
     """
     def __init__(self, **kwargs):
                         block_states[next_block_id]['is_complete'] = True
     def _render_visualization_html(self, step: int, x_t: torch.Tensor, block_states: Dict, cache_length: int, updated_block_ids: Set[int]) -> str:
+        # This function is identical to your original one, with HTML escaping.
         timestamp = int(time.time() * 1000)
+        html_parts = []
+        html_parts.append('<div class="viz-header">Slow-Motion Generation Process</div>')
         for block_id in sorted(k for k in block_states.keys() if k > 0):
             state = block_states[block_id]
+            container_classes = ["block-container"]
+            if block_id in updated_block_ids: container_classes.append("block-updating")
             html_parts.append(f'<div class="{" ".join(container_classes)}" id="block-{block_id}-{timestamp}">')
             block_tokens = x_t[0, state['start_pos']:state['end_pos']]
             for token_id in block_tokens:
                 token_id_int = token_id.item()
                 token_classes = ["token"]
                 if token_id_int == self.mask_token_id:
+                    token_str = '░'; token_classes.append("mask")
                 else:
                     token_str = self.tokenizer.decode([token_id_int], skip_special_tokens=False)
                     token_str = token_str.replace('&', '&').replace('<', '<').replace('>', '>')
                     token_classes.append(f"gen-{(block_id - 1) % 6}")
                 html_parts.append(f'<span class="{" ".join(token_classes)}">{token_str}</span>')
             html_parts.append('</div>')
         html_parts.append(f'<div class="scroll-anchor" id="viz-anchor-{timestamp}"></div>')
+        # Script part from original for scrolling
+        complete_html = f"""
+        <div class="viz-content" id="viz-content-{timestamp}">
+            {''.join(html_parts)}
+        </div>
+        <script>
+        (function() {{
+            const container = document.querySelector('.viz-container');
+            if (container) {{ container.scrollTop = container.scrollHeight; }}
+        }})();
+        </script>
+        """
+        return complete_html
     @spaces.GPU
     @torch.inference_mode()
     def stream_and_capture_for_gradio(
+        self,
+        prompt_text: str,
+        max_new_tokens: int,
+        block_size: int,
+        block_add_threshold: float,
+        decoded_token_threshold: float,
+        skip_threshold: float
     ) -> Iterator[Tuple[str, List[str], str, bool]]:
+        # This is the core generation algorithm, now identical to your original script
+        self._ensure_model_loaded()
         start_time = time.time()
         captured_frames: List[str] = []
             current_viz_html = self._render_visualization_html(step, x_t, block_states, cache_length, updated_block_ids)
             captured_frames.append(current_viz_html)
             yield live_text, captured_frames, f'<div class="generating-indicator">Generating... Step {step}</div>', False
         total_time = time.time() - start_time
         final_generated_ids = x_t[0, prompt_length:]
         eos_positions = (final_generated_ids == self.tokenizer.eos_token_id).nonzero()
+        if eos_positions.numel() > 0:
+            final_generated_ids = final_generated_ids[:eos_positions[0, 0] + 1]
         final_text = self.tokenizer.decode(final_generated_ids, skip_special_tokens=True)
         final_viz_html = self._render_visualization_html(step, x_t, block_states, cache_length, set())
         <div class="stats-card">
             <h3>✅ Generation Complete!</h3>
             <div class="stats-grid">
+                <div class="stat-item">
+                    <div class="stat-label">Total Time</div>
+                    <div class="stat-value">{total_time:.2f}s</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Tokens (incl. EOS)</div>
+                    <div class="stat-value">{tokens_incl_eos}</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Tokens (excl. EOS)</div>
+                    <div class="stat-value">{tokens_excl_eos}</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Tokens/Second</div>
+                    <div class="stat-value">{(tokens_incl_eos / total_time):.1f}</div>
+                </div>
             </div>
         </div>
         """
             yield frame
             time.sleep(delay)
+    # Simplified auto-scroll JS from your original script
     auto_scroll_js = """
     <script>
+    function setupAutoScroll(containerSelector, contentSelector) {
+        const container = document.querySelector(containerSelector);
+        if (!container) return;
+        const observer = new MutationObserver(() => {
+            container.scrollTop = container.scrollHeight;
+        });
+        observer.observe(container, {
+            childList: true,
+            subtree: true
+        });
     }
+    document.addEventListener('DOMContentLoaded', () => {
+        // Use a timeout to ensure Gradio elements are rendered
         setTimeout(() => {
+            setupAutoScroll('#live-text-output', 'textarea');
+            setupAutoScroll('.viz-container', '.viz-content');
+        }, 1500);
     });
     </script>
     """
         html_frames_state = gr.State([])
         generation_complete_state = gr.State(False)
+        gr.HTML(auto_scroll_js) # Keep the JS injection
+        # The entire UI layout is now identical to your original script
         with gr.Column(elem_classes=["main-container"]):
             gr.Markdown("# ✨ D2F: Faster-than-AR Inference for Diffusion LLMs")
             gr.Markdown(
                 """
             with gr.Row():
                 with gr.Column(scale=2):
+                    prompt_input = gr.Textbox(
+                        label="🤔 Enter your question",
+                        placeholder="Ask me anything! Try: 'Explain quantum physics' or 'Write a story about...'",
+                        lines=4,
+                        elem_classes=["param-card"]
+                    )
                     with gr.Accordion("⚙️ Advanced Settings", open=False):
                         with gr.Row():
                             max_new_tokens_slider = gr.Slider(minimum=64, maximum=2048, value=1024, step=64, label="Max Tokens", info="Maximum number of tokens to generate")
                         with gr.Row():
                             skip_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.01, label="Skip Threshold", info="Token selection threshold")
                             delay_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.15, step=0.05, label="Playback Speed", info="Slow-motion playback delay (seconds)")
                     generate_button = gr.Button("🚀 Generate Text", variant="primary", size="lg")
                 with gr.Column(scale=3):
             with gr.Row():
                 with gr.Column():
                     slowmo_button = gr.Button("🎬 Watch Slow-Motion Generation Process", variant="secondary", size="lg", elem_classes=["control-button"], visible=False, interactive=False)
+                    with gr.Group(elem_classes=["viz-container"], visible=False) as viz_group:
                         visualization_output = gr.HTML(label="")
             # Examples are identical to your original script
                 label="💡 Try these examples"
             )
+        # Event handling is now identical to your original, correct script
         def update_slowmo_button_visibility(is_complete):
             return gr.update(visible=is_complete, interactive=is_complete)
             block_add_thresh_slider, decoded_token_thresh_slider, skip_thresh_slider
         ]
+        # This is the original, correct event chain
         generation_event = generate_button.click(
+            fn=lambda: [gr.update(visible=False, interactive=False), gr.update(visible=False), gr.update(value=None), gr.update(value="")],
+            outputs=[slowmo_button, viz_group, stats_output, live_text_output]
         ).then(
             fn=inference_engine.stream_and_capture_for_gradio,
             inputs=inputs_list,
             fn=update_slowmo_button_visibility,
             inputs=[generation_complete_state],
             outputs=[slowmo_button]
         )
         slowmo_event = slowmo_button.click(