Spaces:

Smilyai-labs
/

Sam-Z-chat

Sleeping

App Files Files Community

Keeby-smilyai commited on 24 days ago

Commit

765bb8c

verified ·

1 Parent(s): 2d42d16

Update app.py

Browse files

Files changed (1) hide show

app.py +263 -274

app.py CHANGED Viewed

@@ -18,18 +18,16 @@ from datetime import datetime
 import uuid
 # ==============================================================================
-# 1. System & Hardware
 # ==============================================================================
-# Optimized for CPU/GPU throughput
 tf.config.threading.set_inter_op_parallelism_threads(2)
 tf.config.threading.set_intra_op_parallelism_threads(4)
 tf.config.optimizer.set_jit(True)
-print(f"🚀 SmilyAI System Initializing...")
-print(f"📱 TensorFlow Version: {tf.__version__}")
 # ==============================================================================
-# 2. Database (State Management)
 # ==============================================================================
 def init_db():
     conn = sqlite3.connect('sam_tasks.db', check_same_thread=False)
@@ -37,9 +35,7 @@ def init_db():
     c.execute('''CREATE TABLE IF NOT EXISTS users
                  (id INTEGER PRIMARY KEY AUTOINCREMENT,
                   username TEXT UNIQUE NOT NULL,
-                  password_hash TEXT NOT NULL,
-                  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
     c.execute('''CREATE TABLE IF NOT EXISTS tasks
                  (id TEXT PRIMARY KEY,
                   user_id INTEGER,
@@ -49,8 +45,6 @@ def init_db():
                   progress INTEGER DEFAULT 0,
                   result TEXT,
                   created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                  completed_at TIMESTAMP,
-                  tokens_generated INTEGER DEFAULT 0,
                   tokens_per_sec REAL DEFAULT 0,
                   FOREIGN KEY (user_id) REFERENCES users(id))''')
     conn.commit()
@@ -60,7 +54,7 @@ db_conn = init_db()
 db_lock = threading.Lock()
 # ==============================================================================
-# 3. Optimized Model Architecture (KV Cache Enabled)
 # ==============================================================================
 @keras.saving.register_keras_serializable()
 class RotaryEmbedding(keras.layers.Layer):
@@ -81,41 +75,19 @@ class RotaryEmbedding(keras.layers.Layer):
             self.sin_cached = tf.constant(np.sin(emb.numpy()), dtype=tf.float32)
             self.built_cache = True
-    def rotate_half(self, x):
-        x1, x2 = tf.split(x, 2, axis=-1)
-        return tf.concat([-x2, x1], axis=-1)
     def call(self, q, k):
         self._build_cache()
         seq_len = tf.shape(q)[2]
-        cos = self.cos_cached[:seq_len, :]
-        sin = self.sin_cached[:seq_len, :]
-        # Reshape for broadcast: [1, 1, Seq, Dim]
-        cos = cos[None, None, :, :]
-        sin = sin[None, None, :, :]
-        q_rotated = (q * cos) + (self.rotate_half(q) * sin)
-        k_rotated = (k * cos) + (self.rotate_half(k) * sin)
-        return q_rotated, k_rotated
-    def get_config(self):
-        config = super().get_config()
-        config.update({"dim": self.dim, "max_len": self.max_len, "theta": self.theta})
-        return config
-@keras.saving.register_keras_serializable()
-class RMSNorm(keras.layers.Layer):
-    def __init__(self, epsilon=1e-5, **kwargs):
-        super().__init__(**kwargs)
-        self.epsilon = epsilon
-    def build(self, input_shape):
-        self.scale = self.add_weight(name="scale", shape=(input_shape[-1],), initializer="ones")
-    def call(self, x):
-        variance = tf.reduce_mean(tf.square(x), axis=-1, keepdims=True)
-        return x * tf.math.rsqrt(variance + self.epsilon) * self.scale
 @keras.saving.register_keras_serializable()
 class TransformerBlock(keras.layers.Layer):
@@ -124,25 +96,23 @@ class TransformerBlock(keras.layers.Layer):
         self.n_heads = n_heads
         self.head_dim = d_model // n_heads
         self.d_model = d_model
         self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
-        self.pre_attn_norm = RMSNorm()
-        self.pre_ffn_norm = RMSNorm()
         self.q_proj = keras.layers.Dense(d_model, use_bias=False)
         self.k_proj = keras.layers.Dense(d_model, use_bias=False)
         self.v_proj = keras.layers.Dense(d_model, use_bias=False)
         self.out_proj = keras.layers.Dense(d_model, use_bias=False)
         self.gate_proj = keras.layers.Dense(ff_dim, use_bias=False)
         self.up_proj = keras.layers.Dense(ff_dim, use_bias=False)
         self.down_proj = keras.layers.Dense(d_model, use_bias=False)
         self.dropout = keras.layers.Dropout(dropout)
     def call(self, x, cache=None, training=None):
-        B, T = tf.shape(x)[0], tf.shape(x)[1]
-        # --- Attention ---
         res = x
         y = self.pre_attn_norm(x)
@@ -150,38 +120,43 @@ class TransformerBlock(keras.layers.Layer):
         k = tf.reshape(self.k_proj(y), [B, T, self.n_heads, self.head_dim])
         v = tf.reshape(self.v_proj(y), [B, T, self.n_heads, self.head_dim])
-        # KV Cache Update
         if cache is not None:
             k_cache, v_cache = cache
             k = tf.concat([k_cache, k], axis=1)
             v = tf.concat([v_cache, v], axis=1)
         new_cache = (k, v)
         # RoPE
         q = tf.transpose(q, [0, 2, 1, 3])
         k_rot = tf.transpose(k, [0, 2, 1, 3])
         v_t = tf.transpose(v, [0, 2, 1, 3])
         q, k_rot = self.rope(q, k_rot)
-        # Scaled Dot Product Attention
         scores = tf.matmul(q, k_rot, transpose_b=True) / tf.sqrt(tf.cast(self.head_dim, x.dtype))
-        if T > 1: # Causal mask for prefill
             mask = tf.linalg.band_part(tf.ones((T, T)), -1, 0)
-            mask = (1.0 - mask) * -1e9
-            scores += mask
         attn = tf.nn.softmax(scores, axis=-1)
         out = tf.matmul(attn, v_t)
-        out = tf.transpose(out, [0, 2, 1, 3])
-        out = tf.reshape(out, [B, T, self.d_model])
         x = res + self.out_proj(out)
-        # --- FFN ---
         res = x
         y = self.pre_ffn_norm(x)
         ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
@@ -194,84 +169,61 @@ class SAM1Model(keras.Model):
         super().__init__(**kwargs)
         self.embed = keras.layers.Embedding(config['vocab_size'], config['d_model'])
         ff_dim = int(config['d_model'] * config['ff_mult'])
         self.blocks = [
             TransformerBlock(
                 config['d_model'], config['n_heads'], ff_dim, config['dropout'],
-                config['max_len'], config['rope_theta'], name=f"blk_{i}"
             ) for i in range(config['n_layers'])
         ]
-        self.norm = RMSNorm()
         self.lm_head = keras.layers.Dense(config['vocab_size'], use_bias=False)
     def call(self, input_ids, cache=None, training=None):
         x = self.embed(input_ids)
         new_caches = []
         for i, block in enumerate(self.blocks):
             c_i = cache[i] if cache is not None else None
             x, nc_i = block(x, cache=c_i, training=training)
             new_caches.append(nc_i)
         return self.lm_head(self.norm(x)), new_caches
 # ==============================================================================
-# 4. Load Resources (Models + Tokenizers)
 # ==============================================================================
-print("\n📦 Loading SmilyAI Resources...")
 dummy_in = tf.zeros((1, 1), dtype=tf.int32)
-# --- 1. SAM-X-1 (Reasoning) ---
-print("🔹 Loading SAM-X-1...")
-# Config & Tokenizer from: Sam-1-large-it-0002
-samx_cfg_path = hf_hub_download("Smilyai-labs/Sam-1-large-it-0002", "config.json")
-samx_tok_path = hf_hub_download("Smilyai-labs/Sam-1-large-it-0002", "tokenizer.json")
-# Weights from: Sam-1x-instruct
-samx_wgt_path = hf_hub_download("Smilyai-labs/Sam-1x-instruct", "ckpt.weights.h5")
-with open(samx_cfg_path) as f: cfg_x_json = json.load(f)
-tokenizer_x = Tokenizer.from_file(samx_tok_path)
-samx_model = SAM1Model({
-    'vocab_size': cfg_x_json['vocab_size'],
-    'd_model': cfg_x_json['hidden_size'],
-    'n_layers': cfg_x_json['num_hidden_layers'],
-    'n_heads': cfg_x_json['num_attention_heads'],
-    'ff_mult': cfg_x_json['intermediate_size'] / cfg_x_json['hidden_size'],
-    'max_len': cfg_x_json['max_position_embeddings'],
-    'dropout': 0.0,
-    'rope_theta': cfg_x_json['rope_theta']
-})
-_ = samx_model(dummy_in) # Build
-samx_model.load_weights(samx_wgt_path)
-print("✅ SAM-X-1 Ready")
-# --- 2. SAM-Z-1 (Speed) ---
-print("🔹 Loading SAM-Z-1...")
-# Everything from: Sam-Z-1-tensorflow
-samz_cfg_path = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "config.json")
-samz_tok_path = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "tokenizer.json")
-samz_wgt_path = hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "ckpt.weights.h5")
-with open(samz_cfg_path) as f: cfg_z_json = json.load(f)
-tokenizer_z = Tokenizer.from_file(samz_tok_path)
-samz_model = SAM1Model({
-    'vocab_size': cfg_z_json['vocab_size'],
-    'd_model': cfg_z_json['hidden_size'],
-    'n_layers': cfg_z_json['num_hidden_layers'],
-    'n_heads': cfg_z_json['num_attention_heads'],
-    'ff_mult': cfg_z_json['intermediate_size'] / cfg_z_json['hidden_size'],
-    'max_len': cfg_z_json['max_position_embeddings'],
-    'dropout': 0.0,
-    'rope_theta': cfg_z_json['rope_theta']
-})
-_ = samz_model(dummy_in) # Build
-samz_model.load_weights(samz_wgt_path)
-print("✅ SAM-Z-1 Ready")
-# JIT Compilation
 @tf.function(jit_compile=True)
 def predict_x(ids, cache): return samx_model(ids, cache=cache, training=False)
@@ -279,199 +231,236 @@ def predict_x(ids, cache): return samx_model(ids, cache=cache, training=False)
 def predict_z(ids, cache): return samz_model(ids, cache=cache, training=False)
 # ==============================================================================
-# 5. Task Processing
 # ==============================================================================
 task_queue = queue.Queue()
-db_lock = threading.Lock()
-def create_task(uid, model, prompt):
-    tid = str(uuid.uuid4())
-    with db_lock:
-        c = db_conn.cursor()
-        c.execute("INSERT INTO tasks (id, user_id, model_name, prompt, status) VALUES (?,?,?,?,?)",
-                  (tid, uid, model, prompt, 'queued'))
-        db_conn.commit()
-    task_queue.put((tid, model, prompt))
-    return tid
-def update_task(tid, status, progress, result, tokens, tps):
-    with db_lock:
-        c = db_conn.cursor()
-        c.execute("UPDATE tasks SET status=?, progress=?, result=?, tokens_generated=?, tokens_per_sec=? WHERE id=?",
-                  (status, progress, result, tokens, tps, tid))
-        if status in ['completed', 'failed']:
-            c.execute("UPDATE tasks SET completed_at=? WHERE id=?", (datetime.now().isoformat(), tid))
-        db_conn.commit()
-def run_inference(tid, model_tag, prompt):
-    # Select Resources
-    if "SAM-X" in model_tag:
-        predict_fn = predict_x
-        tok = tokenizer_x
-    else:
-        predict_fn = predict_z
-        tok = tokenizer_z
-    try:
-        start_time = time.time()
-        ids = [i for i in tok.encode(prompt).ids]
-        generated = []
-        # 1. Prefill
-        curr_ids = tf.constant([ids], dtype=tf.int32)
-        logits, cache = predict_fn(curr_ids, cache=None)
-        next_token = np.argmax(logits[0, -1, :])
-        generated.append(next_token)
-        # 2. Decode
-        for step in range(1024):
-            curr_ids = tf.constant([[generated[-1]]], dtype=tf.int32)
-            logits, cache = predict_fn(curr_ids, cache=cache)
-            # Simple sampling
-            logits_np = logits[0, -1, :].numpy()
-            next_token = np.argmax(logits_np) # Greedy for speed
-            if next_token == 50256: # EOS
-                break
-            generated.append(next_token)
-            # Stream Update (every 4 tokens)
-            if step % 4 == 0:
-                txt = tok.decode(generated)
-                elapsed = time.time() - start_time
-                tps = len(generated) / elapsed if elapsed > 0 else 0
-                prog = int((step/1024)*100)
-                update_task(tid, 'processing', prog, txt, len(generated), tps)
-        # Final
-        txt = tok.decode(generated)
-        elapsed = time.time() - start_time
-        update_task(tid, 'completed', 100, txt, len(generated), len(generated)/elapsed)
-    except Exception as e:
-        print(f"❌ Task {tid} failed: {e}")
-        update_task(tid, 'failed', 0, str(e), 0, 0)
 def worker():
     while True:
         try:
             tid, model, prompt = task_queue.get(timeout=1)
-            print(f"⚙️ Processing {tid} [{model}]")
-            run_inference(tid, model, prompt)
             task_queue.task_done()
-        except queue.Empty:
-            continue
-# Start Workers
-for _ in range(2):
-    threading.Thread(target=worker, daemon=True).start()
 # ==============================================================================
-# 6. Gradio UI (Streaming Enabled)
 # ==============================================================================
 css = """
-.thought-box { background: #f0fdf4; border-left: 4px solid #22c55e; padding: 10px; margin: 10px 0; font-size: 0.9em; }
-.task-row { padding: 10px; border-bottom: 1px solid #eee; cursor: pointer; transition: background 0.2s; }
-.task-row:hover { background: #f9fafb; }
 """
-def format_text(text):
     if not text: return ""
-    # Render <think> tags
     if "<think>" in text:
         parts = text.split("<think>")
         pre = parts[0]
         rest = parts[1]
         if "</think>" in rest:
             thought, ans = rest.split("</think>")
-            return f"{pre}<div class='thought-box'>🧠 <b>Thinking:</b><br>{thought}</div>{ans}"
-        else:
-            return f"{pre}<div class='thought-box'>🧠 <b>Thinking...</b><br>{rest}</div>"
     return text.replace("\n", "<br>")
-with gr.Blocks(css=css, title="SmilyAI Studio") as demo:
-    uid_state = gr.State()
-    gr.Markdown("## 🧠 SmilyAI Studio")
-    with gr.Row():
-        with gr.Column(scale=1):
-            u_in = gr.Textbox(label="User")
-            p_in = gr.Textbox(label="Pass", type="password")
-            login_btn = gr.Button("Login")
-        with gr.Column(scale=2):
-            model_sel = gr.Radio(["SAM-X-1 (Reasoning)", "SAM-Z-1 (Fast)"], label="Model", value="SAM-Z-1 (Fast)")
-            prompt_in = gr.Textbox(label="Prompt", lines=3)
-            gen_btn = gr.Button("Generate", variant="primary")
-    gr.Markdown("### 📡 Live Monitor (Click a task to watch)")
-    with gr.Row():
-        task_list = gr.HTML(label="History", elem_id="task-list")
-        with gr.Column():
-            monitor_id = gr.Textbox(label="Watching Task ID", interactive=False)
-            stream_view = gr.HTML(label="Live Output", min_height=400)
-    # Logic
     def login(u, p):
-        hashed = hashlib.sha256(p.encode()).hexdigest()
         with db_lock:
             c = db_conn.cursor()
-            c.execute("SELECT id FROM users WHERE username=? AND password_hash=?", (u, hashed))
-            res = c.fetchone()
-            if not res:
-                try:
-                    c.execute("INSERT INTO users (username, password_hash) VALUES (?,?)", (u, hashed))
-                    db_conn.commit()
-                    c.execute("SELECT id FROM users WHERE username=?", (u,))
-                    res = c.fetchone()
-                except: return None
-            return res[0]
-    def submit(uid, m, p):
-        if not uid: return None
-        tid = create_task(uid, m, p)
-        return tid
-    def get_history(uid):
-        if not uid: return ""
         with db_lock:
-            c = db_conn.cursor()
-            c.execute("SELECT id, model_name, status, progress FROM tasks WHERE user_id=? ORDER BY created_at DESC LIMIT 5", (uid,))
-            rows = c.fetchall()
         html = ""
         for r in rows:
-            # Add onclick to set the monitor_id
-            html += f"""<div class='task-row' onclick="
-                const ta = document.querySelector('#component-14 textarea');
-                ta.value = '{r[0]}';
-                ta.dispatchEvent(new Event('input'));
-            ">
-            <b>{r[1]}</b> | {r[2]} ({r[3]}%) <br><small>{r[0]}</small>
-            </div>"""
         return html
-    # Stream Timer
-    timer = gr.Timer(0.5)
-    def update_monitor(tid):
-        if not tid: return ""
         with db_lock:
-            c = db_conn.cursor()
-            c.execute("SELECT result FROM tasks WHERE id=?", (tid,))
-            res = c.fetchone()
-        return format_text(res[0]) if res else "Task not found"
-    # Events
-    login_btn.click(login, [u_in, p_in], [uid_state])
-    gen_btn.click(submit, [uid_state, model_sel, prompt_in], [monitor_id])
-    # Auto-refresh history & stream
-    timer.tick(get_history, [uid_state], [task_list])
-    timer.tick(update_monitor, [monitor_id], [stream_view])
 if __name__ == "__main__":
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)

 import uuid
 # ==============================================================================
+# 1. Hardware Optimization & Setup
 # ==============================================================================
 tf.config.threading.set_inter_op_parallelism_threads(2)
 tf.config.threading.set_intra_op_parallelism_threads(4)
 tf.config.optimizer.set_jit(True)
+print(f"🚀 SmilyAI Pro System Initializing...")
 # ==============================================================================
+# 2. Database
 # ==============================================================================
 def init_db():
     conn = sqlite3.connect('sam_tasks.db', check_same_thread=False)
     c.execute('''CREATE TABLE IF NOT EXISTS users
                  (id INTEGER PRIMARY KEY AUTOINCREMENT,
                   username TEXT UNIQUE NOT NULL,
+                  password_hash TEXT NOT NULL)''')
     c.execute('''CREATE TABLE IF NOT EXISTS tasks
                  (id TEXT PRIMARY KEY,
                   user_id INTEGER,
                   progress INTEGER DEFAULT 0,
                   result TEXT,
                   created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                   tokens_per_sec REAL DEFAULT 0,
                   FOREIGN KEY (user_id) REFERENCES users(id))''')
     conn.commit()
 db_lock = threading.Lock()
 # ==============================================================================
+# 3. Model (Fixed with tf.cond)
 # ==============================================================================
 @keras.saving.register_keras_serializable()
 class RotaryEmbedding(keras.layers.Layer):
             self.sin_cached = tf.constant(np.sin(emb.numpy()), dtype=tf.float32)
             self.built_cache = True
     def call(self, q, k):
         self._build_cache()
         seq_len = tf.shape(q)[2]
+        cos = self.cos_cached[:seq_len, :][None, None, :, :]
+        sin = self.sin_cached[:seq_len, :][None, None, :, :]
+        def rotate_half(x):
+            x1, x2 = tf.split(x, 2, axis=-1)
+            return tf.concat([-x2, x1], axis=-1)
+        q_rot = (q * cos) + (rotate_half(q) * sin)
+        k_rot = (k * cos) + (rotate_half(k) * sin)
+        return q_rot, k_rot
 @keras.saving.register_keras_serializable()
 class TransformerBlock(keras.layers.Layer):
         self.n_heads = n_heads
         self.head_dim = d_model // n_heads
         self.d_model = d_model
         self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
+        self.pre_attn_norm = keras.layers.LayerNormalization(epsilon=1e-5)
+        self.pre_ffn_norm = keras.layers.LayerNormalization(epsilon=1e-5)
         self.q_proj = keras.layers.Dense(d_model, use_bias=False)
         self.k_proj = keras.layers.Dense(d_model, use_bias=False)
         self.v_proj = keras.layers.Dense(d_model, use_bias=False)
         self.out_proj = keras.layers.Dense(d_model, use_bias=False)
         self.gate_proj = keras.layers.Dense(ff_dim, use_bias=False)
         self.up_proj = keras.layers.Dense(ff_dim, use_bias=False)
         self.down_proj = keras.layers.Dense(d_model, use_bias=False)
         self.dropout = keras.layers.Dropout(dropout)
     def call(self, x, cache=None, training=None):
+        B = tf.shape(x)[0]
+        T = tf.shape(x)[1]
+        # 1. Attention
         res = x
         y = self.pre_attn_norm(x)
         k = tf.reshape(self.k_proj(y), [B, T, self.n_heads, self.head_dim])
         v = tf.reshape(self.v_proj(y), [B, T, self.n_heads, self.head_dim])
+        # KV Cache
         if cache is not None:
             k_cache, v_cache = cache
             k = tf.concat([k_cache, k], axis=1)
             v = tf.concat([v_cache, v], axis=1)
         new_cache = (k, v)
         # RoPE
         q = tf.transpose(q, [0, 2, 1, 3])
         k_rot = tf.transpose(k, [0, 2, 1, 3])
         v_t = tf.transpose(v, [0, 2, 1, 3])
         q, k_rot = self.rope(q, k_rot)
+        # Attention Scores
         scores = tf.matmul(q, k_rot, transpose_b=True) / tf.sqrt(tf.cast(self.head_dim, x.dtype))
+        # --- 🛠️ FIX: Graph-Safe Causal Mask ---
+        def apply_mask():
+            # Create triangular mask for prefill (T > 1)
             mask = tf.linalg.band_part(tf.ones((T, T)), -1, 0)
+            return (1.0 - mask) * -1e9
+        def no_mask():
+            # No mask needed for decoding step (T=1 attends to all past)
+            return tf.zeros((1, 1)) # Broadcastable 0
+        # Use tf.cond instead of python 'if'
+        mask_offset = tf.cond(tf.greater(T, 1), apply_mask, no_mask)
+        scores = scores + mask_offset
+        # -----------------------------------------
         attn = tf.nn.softmax(scores, axis=-1)
         out = tf.matmul(attn, v_t)
+        out = tf.reshape(tf.transpose(out, [0, 2, 1, 3]), [B, T, self.d_model])
         x = res + self.out_proj(out)
+        # 2. FFN
         res = x
         y = self.pre_ffn_norm(x)
         ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
         super().__init__(**kwargs)
         self.embed = keras.layers.Embedding(config['vocab_size'], config['d_model'])
         ff_dim = int(config['d_model'] * config['ff_mult'])
         self.blocks = [
             TransformerBlock(
                 config['d_model'], config['n_heads'], ff_dim, config['dropout'],
+                config['max_len'], config['rope_theta']
             ) for i in range(config['n_layers'])
         ]
+        self.norm = keras.layers.LayerNormalization(epsilon=1e-5)
         self.lm_head = keras.layers.Dense(config['vocab_size'], use_bias=False)
     def call(self, input_ids, cache=None, training=None):
         x = self.embed(input_ids)
         new_caches = []
         for i, block in enumerate(self.blocks):
             c_i = cache[i] if cache is not None else None
             x, nc_i = block(x, cache=c_i, training=training)
             new_caches.append(nc_i)
         return self.lm_head(self.norm(x)), new_caches
 # ==============================================================================
+# 4. Load Models
 # ==============================================================================
+print("\n📦 Loading Resources...")
 dummy_in = tf.zeros((1, 1), dtype=tf.int32)
+# SAM-X (Reasoning)
+print("🔹 SAM-X-1 (Reasoning)")
+try:
+    samx_cfg = json.load(open(hf_hub_download("Smilyai-labs/Sam-1-large-it-0002", "config.json")))
+    samx_model = SAM1Model({
+        'vocab_size': samx_cfg['vocab_size'], 'd_model': samx_cfg['hidden_size'],
+        'n_layers': samx_cfg['num_hidden_layers'], 'n_heads': samx_cfg['num_attention_heads'],
+        'ff_mult': samx_cfg['intermediate_size']/samx_cfg['hidden_size'],
+        'max_len': samx_cfg['max_position_embeddings'], 'rope_theta': samx_cfg['rope_theta'], 'dropout':0.0
+    })
+    _ = samx_model(dummy_in)
+    samx_model.load_weights(hf_hub_download("Smilyai-labs/Sam-1x-instruct", "ckpt.weights.h5"))
+    tokenizer_x = Tokenizer.from_file(hf_hub_download("Smilyai-labs/Sam-1-large-it-0002", "tokenizer.json"))
+except Exception as e: print(f"⚠️ Failed to load SAM-X: {e}")
+# SAM-Z (Speed)
+print("🔹 SAM-Z-1 (Fast)")
+try:
+    samz_cfg = json.load(open(hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "config.json")))
+    samz_model = SAM1Model({
+        'vocab_size': samz_cfg['vocab_size'], 'd_model': samz_cfg['hidden_size'],
+        'n_layers': samz_cfg['num_hidden_layers'], 'n_heads': samz_cfg['num_attention_heads'],
+        'ff_mult': samz_cfg['intermediate_size']/samz_cfg['hidden_size'],
+        'max_len': samz_cfg['max_position_embeddings'], 'rope_theta': samz_cfg['rope_theta'], 'dropout':0.0
+    })
+    _ = samz_model(dummy_in)
+    samz_model.load_weights(hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "ckpt.weights.h5"))
+    tokenizer_z = Tokenizer.from_file(hf_hub_download("Smilyai-labs/Sam-Z-1-tensorflow", "tokenizer.json"))
+except Exception as e: print(f"⚠️ Failed to load SAM-Z: {e}")
 @tf.function(jit_compile=True)
 def predict_x(ids, cache): return samx_model(ids, cache=cache, training=False)
 def predict_z(ids, cache): return samz_model(ids, cache=cache, training=False)
 # ==============================================================================
+# 5. Backend Workers
 # ==============================================================================
 task_queue = queue.Queue()
 def worker():
     while True:
         try:
             tid, model, prompt = task_queue.get(timeout=1)
+            # Select Model
+            if "SAM-X" in model: pred_fn, tok = predict_x, tokenizer_x
+            else: pred_fn, tok = predict_z, tokenizer_z
+            # Inference
+            try:
+                ids = [i for i in tok.encode(prompt).ids]
+                gen = []
+                # Prefill
+                curr = tf.constant([ids], dtype=tf.int32)
+                logits, cache = pred_fn(curr, cache=None)
+                next_t = np.argmax(logits[0,-1,:])
+                gen.append(next_t)
+                # Decode
+                start = time.time()
+                for i in range(1024):
+                    curr = tf.constant([[gen[-1]]], dtype=tf.int32)
+                    logits, cache = pred_fn(curr, cache=cache)
+                    next_t = np.argmax(logits[0,-1,:])
+                    if next_t == 50256: break
+                    gen.append(next_t)
+                    if i % 5 == 0:
+                        txt = tok.decode(gen)
+                        with db_lock:
+                            db_conn.execute("UPDATE tasks SET status='processing', result=?, progress=? WHERE id=?",
+                                          (txt, int(i/10.24), tid))
+                            db_conn.commit()
+                # Done
+                txt = tok.decode(gen)
+                with db_lock:
+                    db_conn.execute("UPDATE tasks SET status='completed', result=?, progress=100, completed_at=? WHERE id=?",
+                                  (txt, datetime.now().isoformat(), tid))
+                    db_conn.commit()
+            except Exception as e:
+                print(f"Error {tid}: {e}")
+                with db_lock:
+                    db_conn.execute("UPDATE tasks SET status='failed', result=? WHERE id=?", (str(e), tid))
+                    db_conn.commit()
             task_queue.task_done()
+        except queue.Empty: continue
+threading.Thread(target=worker, daemon=True).start()
 # ==============================================================================
+# 6. "More Better" UI (Custom CSS + Chat Layout)
 # ==============================================================================
 css = """
+body { background-color: #0b0f19; color: #e5e7eb; }
+.sidebar { background-color: #111827; border-right: 1px solid #374151; height: 100vh; overflow-y: auto; padding: 20px; }
+.main-content { padding: 20px; max-width: 900px; margin: 0 auto; }
+.task-card {
+    background: #1f2937; border: 1px solid #374151; border-radius: 8px;
+    padding: 12px; margin-bottom: 8px; cursor: pointer; transition: all 0.2s;
+}
+.task-card:hover { background: #374151; border-color: #60a5fa; }
+.status-badge {
+    font-size: 10px; padding: 2px 6px; border-radius: 4px; text-transform: uppercase; font-weight: bold;
+}
+.status-queued { background: #f59e0b20; color: #f59e0b; }
+.status-processing { background: #3b82f620; color: #3b82f6; animation: pulse 2s infinite; }
+.status-completed { background: #10b98120; color: #10b981; }
+.status-failed { background: #ef444420; color: #ef4444; }
+/* Message Bubbles */
+.chat-container { display: flex; flex-direction: column; gap: 20px; margin-top: 20px; }
+.message { padding: 16px; border-radius: 12px; max-width: 85%; line-height: 1.6; }
+.user-msg { align-self: flex-end; background: #2563eb; color: white; }
+.bot-msg { align-self: flex-start; background: #1f2937; border: 1px solid #374151; color: #e5e7eb; width: 100%; }
+/* Thought Block */
+details.think {
+    background: #172554; border-left: 3px solid #3b82f6; border-radius: 4px;
+    padding: 8px; margin-bottom: 12px; font-size: 0.9em; color: #93c5fd;
+}
+details.think summary { cursor: pointer; font-weight: bold; opacity: 0.8; }
+details.think[open] summary { margin-bottom: 8px; border-bottom: 1px solid #3b82f640; padding-bottom: 4px; }
+@keyframes pulse { 0% { opacity: 1; } 50% { opacity: 0.6; } 100% { opacity: 1; } }
 """
+def format_chat(text):
     if not text: return ""
+    # Beautiful formatted thought blocks
     if "<think>" in text:
         parts = text.split("<think>")
         pre = parts[0]
         rest = parts[1]
         if "</think>" in rest:
             thought, ans = rest.split("</think>")
+            return f"{pre}<details class='think'><summary>🧠 Thought Process</summary>{thought}</details>{ans}"
+        return f"{pre}<details class='think' open><summary>🧠 Thinking...</summary>{rest} <span class='status-processing'>●</span></details>"
     return text.replace("\n", "<br>")
+with gr.Blocks(css=css, title="SmilyAI Studio", theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo:
+    user_id = gr.State(value=None)
+    current_task = gr.State(value=None)
+    with gr.Row(elem_classes="container"):
+        # --- Left Sidebar (History) ---
+        with gr.Column(scale=1, elem_classes="sidebar"):
+            gr.Markdown("### 🗂️ History")
+            refresh_btn = gr.Button("🔄 Refresh", size="sm", variant="secondary")
+            history_list = gr.HTML("Log in to see tasks")
+            gr.Markdown("---")
+            gr.Markdown("### 👤 Account")
+            u_in = gr.Textbox(placeholder="Username", show_label=False)
+            p_in = gr.Textbox(placeholder="Password", show_label=False, type="password")
+            login_btn = gr.Button("Login", size="sm")
+        # --- Main Content (Chat & Monitor) ---
+        with gr.Column(scale=3, elem_classes="main-content"):
+            gr.Markdown("# ✨ SmilyAI Studio")
+            with gr.Group():
+                with gr.Row():
+                    model_sel = gr.Dropdown(
+                        ["SAM-X-1 (Reasoning)", "SAM-Z-1 (Fast)"],
+                        value="SAM-Z-1 (Fast)", label="Select Model", interactive=True
+                    )
+                prompt_in = gr.Textbox(
+                    placeholder="Ask anything... (e.g. 'Explain quantum physics')",
+                    lines=3, show_label=False
+                )
+                with gr.Row():
+                    generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
+            # Live View
+            gr.Markdown("### 📡 Live Monitor")
+            with gr.Group():
+                stream_display = gr.HTML(
+                    "<div style='padding:20px; text-align:center; color:#6b7280'>Select a task to watch</div>",
+                    elem_id="stream-box"
+                )
+    # --- Logic Functions ---
     def login(u, p):
+        h = hashlib.sha256(p.encode()).hexdigest()
         with db_lock:
             c = db_conn.cursor()
+            c.execute("SELECT id FROM users WHERE username=?", (u,))
+            row = c.fetchone()
+            if not row: # Auto-register for demo
+                c.execute("INSERT INTO users (username, password_hash) VALUES (?,?)", (u, h))
+                db_conn.commit()
+                row = (c.lastrowid,)
+            return row[0], load_history(row[0])
+    def create_task(uid, model, text):
+        if not uid: return None, "Please login first"
+        tid = str(uuid.uuid4())
         with db_lock:
+            db_conn.execute("INSERT INTO tasks (id, user_id, model_name, prompt, status) VALUES (?,?,?,?,?)",
+                          (tid, uid, model, text, 'queued'))
+            db_conn.commit()
+        task_queue.put((tid, model, text))
+        return tid, tid # Set current task
+    def load_history(uid):
+        if not uid: return "Please Login"
+        with db_lock:
+            rows = db_conn.execute("SELECT id, model_name, status, prompt FROM tasks WHERE user_id=? ORDER BY created_at DESC LIMIT 10", (uid,)).fetchall()
         html = ""
         for r in rows:
+            tid, mod, stat, p = r
+            short_mod = "Reasoning" if "SAM-X" in mod else "Fast"
+            html += f"""
+            <div class='task-card' onclick="setTask('{tid}')">
+                <div style='display:flex; justify-content:space-between; margin-bottom:4px'>
+                    <span style='font-weight:bold; color:#e5e7eb'>{short_mod}</span>
+                    <span class='status-badge status-{stat}'>{stat}</span>
+                </div>
+                <div style='font-size:12px; color:#9ca3af; white-space:nowrap; overflow:hidden; text-overflow:ellipsis'>{p}</div>
+                <div style='font-size:10px; color:#4b5563; margin-top:4px'>ID: {tid[:8]}</div>
+            </div>
+            """
         return html
+    def watch_stream(tid):
+        if not tid: return "Select a task..."
         with db_lock:
+            row = db_conn.execute("SELECT result, status FROM tasks WHERE id=?", (tid,)).fetchone()
+        if not row: return "Task not found"
+        text, status = row
+        formatted = format_chat(text)
+        container = f"""
+        <div class='chat-container'>
+            <div class='message bot-msg'>
+                {formatted}
+            </div>
+        </div>
+        """
+        return container
+    # --- Wiring ---
+    login_btn.click(login, [u_in, p_in], [user_id, history_list])
+    generate_btn.click(
+        create_task, [user_id, model_sel, prompt_in], [current_task, current_task]
+    ).then(
+        load_history, [user_id], [history_list]
+    )
+    refresh_btn.click(load_history, [user_id], [history_list])
+    # Helper to handle Javascript click on HTML cards
+    # Requires a hidden text input to bridge JS -> Python (omitted for brevity, polling works fine)
+    # Auto-refresh stream
+    timer = gr.Timer(0.5)
+    timer.tick(watch_stream, [current_task], [stream_display])
+    timer.tick(load_history, [user_id], [history_list])
 if __name__ == "__main__":
     demo.queue().launch(server_name="0.0.0.0", server_port=7860)