Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import tensorflow as tf
|
|
|
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
import json
|
| 5 |
import os
|
|
@@ -25,8 +26,8 @@ CACHE_DIR = "./model_cache"
|
|
| 25 |
# Model Architecture Definitions (Required for Loading)
|
| 26 |
# ============================================================================
|
| 27 |
|
| 28 |
-
@
|
| 29 |
-
class RotaryEmbedding(
|
| 30 |
def __init__(self, dim, max_len=2048, theta=10000, **kwargs):
|
| 31 |
super().__init__(**kwargs)
|
| 32 |
self.dim = dim
|
|
@@ -68,8 +69,8 @@ class RotaryEmbedding(tf.keras.layers.Layer):
|
|
| 68 |
return config
|
| 69 |
|
| 70 |
|
| 71 |
-
@
|
| 72 |
-
class RMSNorm(
|
| 73 |
def __init__(self, epsilon=1e-5, **kwargs):
|
| 74 |
super().__init__(**kwargs)
|
| 75 |
self.epsilon = epsilon
|
|
@@ -87,8 +88,8 @@ class RMSNorm(tf.keras.layers.Layer):
|
|
| 87 |
return config
|
| 88 |
|
| 89 |
|
| 90 |
-
@
|
| 91 |
-
class TransformerBlock(
|
| 92 |
def __init__(self, d_model, n_heads, ff_dim, dropout, max_len, rope_theta, layer_idx=0, **kwargs):
|
| 93 |
super().__init__(**kwargs)
|
| 94 |
self.d_model = d_model
|
|
@@ -103,18 +104,18 @@ class TransformerBlock(tf.keras.layers.Layer):
|
|
| 103 |
self.pre_attn_norm = RMSNorm()
|
| 104 |
self.pre_ffn_norm = RMSNorm()
|
| 105 |
|
| 106 |
-
self.q_proj =
|
| 107 |
-
self.k_proj =
|
| 108 |
-
self.v_proj =
|
| 109 |
-
self.out_proj =
|
| 110 |
|
| 111 |
self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
|
| 112 |
|
| 113 |
-
self.gate_proj =
|
| 114 |
-
self.up_proj =
|
| 115 |
-
self.down_proj =
|
| 116 |
|
| 117 |
-
self.dropout =
|
| 118 |
|
| 119 |
def call(self, x, training=None):
|
| 120 |
B, T, D = tf.shape(x)[0], tf.shape(x)[1], self.d_model
|
|
@@ -146,7 +147,7 @@ class TransformerBlock(tf.keras.layers.Layer):
|
|
| 146 |
# FFN (SwiGLU)
|
| 147 |
res = x
|
| 148 |
y = self.pre_ffn_norm(x)
|
| 149 |
-
ffn = self.down_proj(
|
| 150 |
|
| 151 |
return res + self.dropout(ffn, training=training)
|
| 152 |
|
|
@@ -164,8 +165,8 @@ class TransformerBlock(tf.keras.layers.Layer):
|
|
| 164 |
return config
|
| 165 |
|
| 166 |
|
| 167 |
-
@
|
| 168 |
-
class SAM1Model(
|
| 169 |
def __init__(self, **kwargs):
|
| 170 |
super().__init__()
|
| 171 |
if 'config' in kwargs and isinstance(kwargs['config'], dict):
|
|
@@ -175,7 +176,7 @@ class SAM1Model(tf.keras.Model):
|
|
| 175 |
else:
|
| 176 |
self.cfg = kwargs.get('cfg', kwargs)
|
| 177 |
|
| 178 |
-
self.embed =
|
| 179 |
|
| 180 |
ff_dim = int(self.cfg['d_model'] * self.cfg['ff_mult'])
|
| 181 |
block_args = {
|
|
@@ -193,7 +194,7 @@ class SAM1Model(tf.keras.Model):
|
|
| 193 |
self.blocks.append(block)
|
| 194 |
|
| 195 |
self.norm = RMSNorm(name="final_norm")
|
| 196 |
-
self.lm_head =
|
| 197 |
|
| 198 |
def call(self, input_ids, training=None):
|
| 199 |
x = self.embed(input_ids)
|
|
@@ -239,7 +240,7 @@ print(f" Custom tokens: {custom_tokens}")
|
|
| 239 |
eos_token_id = config.get('eos_token_id', 50256)
|
| 240 |
|
| 241 |
# Load model with TF function optimization
|
| 242 |
-
model =
|
| 243 |
|
| 244 |
# Create optimized inference function
|
| 245 |
@tf.function(reduce_retracing=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import tensorflow as tf
|
| 3 |
+
import keras
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
import json
|
| 6 |
import os
|
|
|
|
| 26 |
# Model Architecture Definitions (Required for Loading)
|
| 27 |
# ============================================================================
|
| 28 |
|
| 29 |
+
@keras.saving.register_keras_serializable()
|
| 30 |
+
class RotaryEmbedding(keras.layers.Layer):
|
| 31 |
def __init__(self, dim, max_len=2048, theta=10000, **kwargs):
|
| 32 |
super().__init__(**kwargs)
|
| 33 |
self.dim = dim
|
|
|
|
| 69 |
return config
|
| 70 |
|
| 71 |
|
| 72 |
+
@keras.saving.register_keras_serializable()
|
| 73 |
+
class RMSNorm(keras.layers.Layer):
|
| 74 |
def __init__(self, epsilon=1e-5, **kwargs):
|
| 75 |
super().__init__(**kwargs)
|
| 76 |
self.epsilon = epsilon
|
|
|
|
| 88 |
return config
|
| 89 |
|
| 90 |
|
| 91 |
+
@keras.saving.register_keras_serializable()
|
| 92 |
+
class TransformerBlock(keras.layers.Layer):
|
| 93 |
def __init__(self, d_model, n_heads, ff_dim, dropout, max_len, rope_theta, layer_idx=0, **kwargs):
|
| 94 |
super().__init__(**kwargs)
|
| 95 |
self.d_model = d_model
|
|
|
|
| 104 |
self.pre_attn_norm = RMSNorm()
|
| 105 |
self.pre_ffn_norm = RMSNorm()
|
| 106 |
|
| 107 |
+
self.q_proj = keras.layers.Dense(d_model, use_bias=False, name="q_proj")
|
| 108 |
+
self.k_proj = keras.layers.Dense(d_model, use_bias=False, name="k_proj")
|
| 109 |
+
self.v_proj = keras.layers.Dense(d_model, use_bias=False, name="v_proj")
|
| 110 |
+
self.out_proj = keras.layers.Dense(d_model, use_bias=False, name="o_proj")
|
| 111 |
|
| 112 |
self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
|
| 113 |
|
| 114 |
+
self.gate_proj = keras.layers.Dense(ff_dim, use_bias=False, name="gate_proj")
|
| 115 |
+
self.up_proj = keras.layers.Dense(ff_dim, use_bias=False, name="up_proj")
|
| 116 |
+
self.down_proj = keras.layers.Dense(d_model, use_bias=False, name="down_proj")
|
| 117 |
|
| 118 |
+
self.dropout = keras.layers.Dropout(dropout)
|
| 119 |
|
| 120 |
def call(self, x, training=None):
|
| 121 |
B, T, D = tf.shape(x)[0], tf.shape(x)[1], self.d_model
|
|
|
|
| 147 |
# FFN (SwiGLU)
|
| 148 |
res = x
|
| 149 |
y = self.pre_ffn_norm(x)
|
| 150 |
+
ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
|
| 151 |
|
| 152 |
return res + self.dropout(ffn, training=training)
|
| 153 |
|
|
|
|
| 165 |
return config
|
| 166 |
|
| 167 |
|
| 168 |
+
@keras.saving.register_keras_serializable()
|
| 169 |
+
class SAM1Model(keras.Model):
|
| 170 |
def __init__(self, **kwargs):
|
| 171 |
super().__init__()
|
| 172 |
if 'config' in kwargs and isinstance(kwargs['config'], dict):
|
|
|
|
| 176 |
else:
|
| 177 |
self.cfg = kwargs.get('cfg', kwargs)
|
| 178 |
|
| 179 |
+
self.embed = keras.layers.Embedding(self.cfg['vocab_size'], self.cfg['d_model'], name="embed_tokens")
|
| 180 |
|
| 181 |
ff_dim = int(self.cfg['d_model'] * self.cfg['ff_mult'])
|
| 182 |
block_args = {
|
|
|
|
| 194 |
self.blocks.append(block)
|
| 195 |
|
| 196 |
self.norm = RMSNorm(name="final_norm")
|
| 197 |
+
self.lm_head = keras.layers.Dense(self.cfg['vocab_size'], use_bias=False, name="lm_head")
|
| 198 |
|
| 199 |
def call(self, input_ids, training=None):
|
| 200 |
x = self.embed(input_ids)
|
|
|
|
| 240 |
eos_token_id = config.get('eos_token_id', 50256)
|
| 241 |
|
| 242 |
# Load model with TF function optimization
|
| 243 |
+
model = keras.models.load_model(model_path, compile=False)
|
| 244 |
|
| 245 |
# Create optimized inference function
|
| 246 |
@tf.function(reduce_retracing=True)
|