Spaces:

Smilyai-labs
/

Sam-X-series-chat

Sleeping

App Files Files Community

Keeby-smilyai commited on Oct 17

Commit

4e68118

verified ·

1 Parent(s): dd3c42c

Create app.py

Browse files

Files changed (1) hide show

app.py +418 -0

app.py ADDED Viewed

	@@ -0,0 +1,418 @@

+import os
+os.environ['KERAS_BACKEND'] = 'tensorflow'
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+import tensorflow as tf
+import keras
+import numpy as np
+from tokenizers import Tokenizer
+from huggingface_hub import hf_hub_download
+import re
+import json
+# ==============================================================================
+# Model Architecture (Must match training code)
+# ==============================================================================
+@keras.saving.register_keras_serializable()
+class RotaryEmbedding(keras.layers.Layer):
+    def __init__(self, dim, max_len=2048, theta=10000, **kwargs):
+        super().__init__(**kwargs)
+        self.dim = dim
+        self.max_len = max_len
+        self.theta = theta
+        self.built_cache = False
+    def build(self, input_shape):
+        if not self.built_cache:
+            inv_freq = 1.0 / (self.theta ** (tf.range(0, self.dim, 2, dtype=tf.float32) / self.dim))
+            t = tf.range(self.max_len, dtype=tf.float32)
+            freqs = tf.einsum("i,j->ij", t, inv_freq)
+            emb = tf.concat([freqs, freqs], axis=-1)
+            self.cos_cached = tf.constant(tf.cos(emb), dtype=tf.float32)
+            self.sin_cached = tf.constant(tf.sin(emb), dtype=tf.float32)
+            self.built_cache = True
+        super().build(input_shape)
+    def rotate_half(self, x):
+        x1, x2 = tf.split(x, 2, axis=-1)
+        return tf.concat([-x2, x1], axis=-1)
+    def call(self, q, k):
+        seq_len = tf.shape(q)[2]
+        dtype = q.dtype
+        cos = tf.cast(self.cos_cached[:seq_len, :], dtype)[None, None, :, :]
+        sin = tf.cast(self.sin_cached[:seq_len, :], dtype)[None, None, :, :]
+        q_rotated = (q * cos) + (self.rotate_half(q) * sin)
+        k_rotated = (k * cos) + (self.rotate_half(k) * sin)
+        return q_rotated, k_rotated
+    def get_config(self):
+        config = super().get_config()
+        config.update({"dim": self.dim, "max_len": self.max_len, "theta": self.theta})
+        return config
+@keras.saving.register_keras_serializable()
+class RMSNorm(keras.layers.Layer):
+    def __init__(self, epsilon=1e-5, **kwargs):
+        super().__init__(**kwargs)
+        self.epsilon = epsilon
+    def build(self, input_shape):
+        self.scale = self.add_weight(name="scale", shape=(input_shape[-1],), initializer="ones")
+    def call(self, x):
+        variance = tf.reduce_mean(tf.square(x), axis=-1, keepdims=True)
+        return x * tf.math.rsqrt(variance + self.epsilon) * self.scale
+    def get_config(self):
+        config = super().get_config()
+        config.update({"epsilon": self.epsilon})
+        return config
+@keras.saving.register_keras_serializable()
+class TransformerBlock(keras.layers.Layer):
+    def __init__(self, d_model, n_heads, ff_dim, dropout, max_len, rope_theta, layer_idx=0, **kwargs):
+        super().__init__(**kwargs)
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.ff_dim = ff_dim
+        self.dropout_rate = dropout
+        self.max_len = max_len
+        self.rope_theta = rope_theta
+        self.head_dim = d_model // n_heads
+        self.layer_idx = layer_idx
+        self.pre_attn_norm = RMSNorm()
+        self.pre_ffn_norm = RMSNorm()
+        self.q_proj = keras.layers.Dense(d_model, use_bias=False, name="q_proj")
+        self.k_proj = keras.layers.Dense(d_model, use_bias=False, name="k_proj")
+        self.v_proj = keras.layers.Dense(d_model, use_bias=False, name="v_proj")
+        self.out_proj = keras.layers.Dense(d_model, use_bias=False, name="o_proj")
+        self.rope = RotaryEmbedding(self.head_dim, max_len=max_len, theta=rope_theta)
+        self.gate_proj = keras.layers.Dense(ff_dim, use_bias=False, name="gate_proj")
+        self.up_proj = keras.layers.Dense(ff_dim, use_bias=False, name="up_proj")
+        self.down_proj = keras.layers.Dense(d_model, use_bias=False, name="down_proj")
+        self.dropout = keras.layers.Dropout(dropout)
+    def call(self, x, training=None):
+        B, T, D = tf.shape(x)[0], tf.shape(x)[1], self.d_model
+        dtype = x.dtype
+        res = x
+        y = self.pre_attn_norm(x)
+        q = tf.transpose(tf.reshape(self.q_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
+        k = tf.transpose(tf.reshape(self.k_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
+        v = tf.transpose(tf.reshape(self.v_proj(y), [B, T, self.n_heads, self.head_dim]), [0, 2, 1, 3])
+        q, k = self.rope(q, k)
+        scores = tf.matmul(q, k, transpose_b=True) / tf.sqrt(tf.cast(self.head_dim, dtype))
+        mask = tf.where(
+            tf.linalg.band_part(tf.ones([T, T], dtype=dtype), -1, 0) == 0,
+            tf.constant(-1e9, dtype=dtype),
+            tf.constant(0.0, dtype=dtype)
+        )
+        scores += mask
+        attn = tf.matmul(tf.nn.softmax(scores, axis=-1), v)
+        attn = tf.reshape(tf.transpose(attn, [0, 2, 1, 3]), [B, T, D])
+        x = res + self.dropout(self.out_proj(attn), training=training)
+        res = x
+        y = self.pre_ffn_norm(x)
+        ffn = self.down_proj(keras.activations.silu(self.gate_proj(y)) * self.up_proj(y))
+        return res + self.dropout(ffn, training=training)
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "d_model": self.d_model,
+            "n_heads": self.n_heads,
+            "ff_dim": self.ff_dim,
+            "dropout": self.dropout_rate,
+            "max_len": self.max_len,
+            "rope_theta": self.rope_theta,
+            "layer_idx": self.layer_idx
+        })
+        return config
+@keras.saving.register_keras_serializable()
+class SAM1Model(keras.Model):
+    def __init__(self, **kwargs):
+        super().__init__()
+        if 'config' in kwargs and isinstance(kwargs['config'], dict):
+            self.cfg = kwargs['config']
+        elif 'vocab_size' in kwargs:
+            self.cfg = kwargs
+        else:
+            self.cfg = kwargs.get('cfg', kwargs)
+        self.embed = keras.layers.Embedding(self.cfg['vocab_size'], self.cfg['d_model'], name="embed_tokens")
+        ff_dim = int(self.cfg['d_model'] * self.cfg['ff_mult'])
+        block_args = {
+            'd_model': self.cfg['d_model'],
+            'n_heads': self.cfg['n_heads'],
+            'ff_dim': ff_dim,
+            'dropout': self.cfg['dropout'],
+            'max_len': self.cfg['max_len'],
+            'rope_theta': self.cfg['rope_theta']
+        }
+        self.blocks = []
+        for i in range(self.cfg['n_layers']):
+            block = TransformerBlock(name=f"block_{i}", layer_idx=i, **block_args)
+            self.blocks.append(block)
+        self.norm = RMSNorm(name="final_norm")
+        self.lm_head = keras.layers.Dense(self.cfg['vocab_size'], use_bias=False, name="lm_head")
+    def call(self, input_ids, training=None):
+        x = self.embed(input_ids)
+        for block in self.blocks:
+            x = block(x, training=training)
+        return self.lm_head(self.norm(x))
+    def get_config(self):
+        base_config = super().get_config()
+        base_config['config'] = self.cfg
+        return base_config
+# ==============================================================================
+# Load Model from HuggingFace
+# ==============================================================================
+CONFIG_TOKENIZER_REPO_ID = "Smilyai-labs/Sam-1-large-it-0002"
+MODEL_WEIGHTS_REPO_ID = "Smilyai-labs/Sam-1x-instruct"
+print("="*70)
+print("🤖 SAM-1 Keras Chat Interface".center(70))
+print("="*70)
+print(f"\n📦 Downloading config/tokenizer from: {CONFIG_TOKENIZER_REPO_ID}")
+print(f"📦 Downloading model weights from: {MODEL_WEIGHTS_REPO_ID}")
+# Download config and tokenizer files
+print("\n⏳ Downloading config...")
+config_path = hf_hub_download(repo_id=CONFIG_TOKENIZER_REPO_ID, filename="config.json")
+print("⏳ Downloading tokenizer...")
+tokenizer_path = hf_hub_download(repo_id=CONFIG_TOKENIZER_REPO_ID, filename="tokenizer.json")
+# Download model weights
+print("⏳ Downloading model weights (this may take a while)...")
+try:
+    weights_path = hf_hub_download(repo_id=MODEL_WEIGHTS_REPO_ID, filename="model.keras")
+    print("✅ Downloaded model.keras")
+except Exception as e:
+    print(f"❌ Failed to download model.keras: {e}")
+    print("⏳ Trying to download ckpt.weights.h5 instead...")
+    try:
+        weights_path = hf_hub_download(repo_id=MODEL_WEIGHTS_REPO_ID, filename="ckpt.weights.h5")
+        print("✅ Downloaded ckpt.weights.h5")
+    except Exception as e_h5:
+        raise FileNotFoundError(f"❌ Failed to download both model.keras and ckpt.weights.h5: {e_h5}")
+# Load config
+print("\n📋 Loading config...")
+with open(config_path, 'r') as f:
+    config = json.load(f)
+print(f"✅ Config loaded:")
+print(f"   Vocab size: {config['vocab_size']}")
+print(f"   Max length: {config['max_position_embeddings']}")
+print(f"   Hidden size: {config['hidden_size']}")
+print(f"   Layers: {config['num_hidden_layers']}")
+# Recreate tokenizer (like in training script)
+print("\n🔤 Recreating tokenizer from scratch...")
+tokenizer = Tokenizer.from_pretrained("gpt2")
+eos_token = ""
+eos_token_id = tokenizer.token_to_id(eos_token)
+if eos_token_id is None:
+    tokenizer.add_special_tokens([eos_token])
+    eos_token_id = tokenizer.token_to_id(eos_token)
+    print(f"   Added EOS token '{eos_token}' with ID: {eos_token_id}")
+# Add custom <think> tags (CRITICAL - must match training!)
+custom_tokens = ["<think>", "<think/>"]
+for token in custom_tokens:
+    if tokenizer.token_to_id(token) is None:
+        tokenizer.add_special_tokens([token])
+        print(f"   Added custom token '{token}' with ID: {tokenizer.token_to_id(token)}")
+# Disable padding for generation (handle explicitly)
+tokenizer.no_padding()
+tokenizer.enable_truncation(max_length=config['max_position_embeddings'])
+print(f"✅ Tokenizer recreated (vocab size: {tokenizer.get_vocab_size()})")
+print(f"    <think> token ID: {tokenizer.token_to_id('<think>')}")
+print(f"    </think> token ID: {tokenizer.token_to_id('<think/>')}")
+# Load model
+print("\n🧠 Loading model...")
+model_config = {
+    'vocab_size': config['vocab_size'],
+    'd_model': config['hidden_size'],
+    'n_heads': config['num_attention_heads'],
+    'ff_mult': config['intermediate_size'] / config['hidden_size'],
+    'dropout': config.get('dropout', 0.0),
+    'max_len': config['max_position_embeddings'],
+    'rope_theta': config['rope_theta'],
+    'n_layers': config['num_hidden_layers']
+}
+model = SAM1Model(**model_config)
+# Build the model with a dummy input shape
+dummy_input = tf.zeros((1, 1), dtype=tf.int32)
+model(dummy_input)
+# Load weights into the built model
+try:
+    model.load_weights(weights_path)
+    print("✅ Model weights loaded successfully!")
+except Exception as e:
+    raise RuntimeError(f"❌ Failed to load model weights: {e}")
+model.trainable = False
+print("✅ Model loaded successfully!")
+print(f"   Device: {'GPU' if len(tf.config.list_physical_devices('GPU')) > 0 else 'CPU'}")
+# ==============================================================================
+# Generation Functions
+# ==============================================================================
+def parse_thinking_response(text):
+    """Parse response to extract thinking process and final answer."""
+    think_pattern = r'<think>(.*?)(?:</think>|<think/>)'
+    thinking = re.findall(think_pattern, text, re.DOTALL)
+    final_answer = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
+    return thinking, final_answer
+def generate_response(
+    prompt,
+    max_new_tokens=512,
+    temperature=0.7,
+    top_p=0.9,
+    top_k=50,
+    show_thinking=False # Default False for Gradio, we handle display separately
+):
+    """Generate response from the Keras model."""
+    encoded_prompt = tokenizer.encode(prompt)
+    input_ids = [i for i in encoded_prompt.ids if i != eos_token_id]
+    generated = input_ids.copy()
+    for _ in range(max_new_tokens):
+        max_len = config['max_position_embeddings']
+        current_input = generated[-max_len:]
+        inputs = np.array([current_input], dtype=np.int32)
+        logits = model(inputs, training=False)
+        next_token_logits = logits[0, -1, :].numpy()
+        if temperature > 0:
+            next_token_logits = next_token_logits / temperature
+            if top_k > 0:
+                top_k_indices = np.argpartition(next_token_logits, -top_k)[-top_k:]
+                top_k_logits = next_token_logits[top_k_indices]
+                top_k_probs = np.exp(top_k_logits - np.max(top_k_logits))
+                top_k_probs /= top_k_probs.sum()
+                next_token = top_k_indices[np.random.choice(len(top_k_indices), p=top_k_probs)]
+            else:
+                probs = np.exp(next_token_logits - np.max(next_token_logits))
+                probs /= probs.sum()
+                next_token = np.random.choice(len(probs), p=probs)
+        else:
+            next_token = np.argmax(next_token_logits)
+        if next_token == eos_token_id:
+            break
+        generated.append(int(next_token))
+    return tokenizer.decode(generated[len(input_ids):]) # Decode only the new tokens
+# ==============================================================================
+# Main - Gradio Interface
+# ==============================================================================
+if __name__ == "__main__":
+    import gradio as gr
+    def gradio_generate(user_input, show_thinking, temperature):
+        """Wrapper function for Gradio."""
+        if not user_input.strip():
+            return "Please enter a prompt.", ""
+        prompt = f"User: {user_input}\nSam:  <think>"
+        raw_response = generate_response(
+            prompt,
+            max_new_tokens=512,
+            temperature=temperature,
+            show_thinking=False
+        )
+        thinking_list, final_answer = parse_thinking_response(raw_response)
+        thinking_text = "\n\n".join([f"💭 {thought.strip()}" for thought in thinking_list]) if thinking_list else "No explicit thinking trace."
+        if show_thinking:
+            return f"{thinking_text}\n\n---\n\n**Answer:**\n{final_answer}", raw_response
+        else:
+            return f"**Answer:**\n{final_answer}", raw_response
+    with gr.Blocks(title="SAM-1 Chat") as demo:
+        gr.Markdown("# 🤖 SAM-1 Keras Chat Interface")
+        with gr.Row():
+            with gr.Column(scale=3):
+                user_input = gr.Textbox(
+                    label="Your Message",
+                    placeholder="Ask me anything...",
+                    lines=3
+                )
+            with gr.Column(scale=1):
+                with gr.Group():
+                    temp_slider = gr.Slider(
+                        minimum=0.0,
+                        maximum=2.0,
+                        value=0.7,
+                        step=0.1,
+                        label="Temperature"
+                    )
+                    show_think_checkbox = gr.Checkbox(
+                        label="Show Thinking Process",
+                        value=True
+                    )
+                    submit_btn = gr.Button("Send Message", variant="primary")
+        response_output = gr.Markdown(label="Response")
+        # raw_output = gr.Textbox(label="Raw Response (Debug)", visible=False)
+        submit_btn.click(
+            fn=gradio_generate,
+            inputs=[user_input, show_think_checkbox, temp_slider],
+            outputs=[response_output]#, raw_output]
+        )
+        user_input.submit(
+            fn=gradio_generate,
+            inputs=[user_input, show_think_checkbox, temp_slider],
+            outputs=[response_output]#, raw_output]
+        )
+    demo.launch(debug=True, share=True)