Spaces:

Shriti09
/

Smol2TextGenerator

Sleeping

App Files Files Community

Shriti09 commited on Jan 27

Commit

f271aef

verified ·

1 Parent(s): 95d8c80

Upload 3 files

Browse files

Files changed (3) hide show

app.py +92 -0
model_smol2.py +260 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import torch
+import gradio as gr
+from transformers import AutoTokenizer
+from model_smol2 import LlamaForCausalLM, config_model
+# Instantiate the model
+model = LlamaForCausalLM(config_model)
+# Load the checkpoint
+checkpoint_path = "/Users/shriti/Downloads/Assign13_ERAV3/deply/final_checkpoint.pt"
+checkpoint = torch.load(checkpoint_path, map_location="cpu")
+model.load_state_dict(checkpoint['model_state_dict'])
+model.eval()
+# Load tokenizer (replace with the appropriate tokenizer if you're using a custom one)
+# Load the tokenizer
+TOKENIZER_PATH = "HuggingFaceTB/cosmo2-tokenizer"
+tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else "[PAD]"
+# Text generation function
+def generate_text(
+    prompt, max_length=50, temperature=0.7, top_k=50, repetition_penalty=1.2, n_gram_block=2
+):
+    input_ids = tokenizer.encode(prompt, return_tensors="pt")
+    generated_tokens = input_ids[0].tolist()
+    with torch.no_grad():
+        for _ in range(max_length):
+            outputs = model(input_ids)  # model outputs
+            # Check if the output is a dictionary with logits
+            if isinstance(outputs, dict) and 'logits' in outputs:
+                logits = outputs['logits'][:, -1, :]
+            else:
+                # If not, treat the output as a plain tensor
+                logits = outputs[:, -1, :]
+            # Repetition penalty
+            for token_id in set(generated_tokens):
+                logits[:, token_id] /= repetition_penalty
+            # n-gram blocking
+            if len(generated_tokens) >= n_gram_block:
+                n_gram = tuple(generated_tokens[-n_gram_block:])
+                for token_id in set(generated_tokens):
+                    if generated_tokens[-n_gram_block:] == list(n_gram):
+                        logits[:, token_id] -= 1e9
+            logits /= temperature
+            top_k_logits, top_k_indices = torch.topk(logits, top_k, dim=-1)
+            probs = torch.softmax(top_k_logits, dim=-1)
+            next_token_idx = torch.multinomial(probs, num_samples=1)
+            next_token = top_k_indices[0, next_token_idx[0]]
+            generated_tokens.append(next_token.item())
+            input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)
+            if next_token.item() == tokenizer.eos_token_id:
+                break
+    return tokenizer.decode(generated_tokens, skip_special_tokens=True)
+# Gradio UI
+def generate_response(prompt, max_length, temperature, top_k, repetition_penalty, n_gram_block):
+    return generate_text(prompt, max_length, temperature, top_k, repetition_penalty, n_gram_block)
+with gr.Blocks() as demo:
+    gr.Markdown("# Smol2 Text Generator")
+    with gr.Row():
+        with gr.Column():
+            prompt_input = gr.Textbox(label="Input Prompt", placeholder="Enter your text prompt here...")
+            max_length = gr.Slider(label="Max Length", minimum=10, maximum=200, value=50)
+            temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, value=0.7, step=0.1)
+            top_k = gr.Slider(label="Top K", minimum=10, maximum=100, value=50, step=1)
+            repetition_penalty = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.1)
+            n_gram_block = gr.Slider(label="N-Gram Blocking", minimum=1, maximum=5, value=2, step=1)
+            generate_button = gr.Button("Generate Text")
+        with gr.Column():
+            output_text = gr.Textbox(label="Generated Text", lines=10)
+    generate_button.click(
+        generate_response,
+        inputs=[prompt_input, max_length, temperature, top_k, repetition_penalty, n_gram_block],
+        outputs=[output_text],
+    )
+demo.launch()

model_smol2.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# Configuration as provided
+config_model = {
+    "bos_token_id": 0,
+    "eos_token_id": 0,
+    "hidden_act": "silu",
+    "hidden_size": 576,
+    "initializer_range": 0.041666666666666664,
+    "intermediate_size": 1536,
+    "is_llama_config": True,
+    "max_position_embeddings": 2048,
+    "num_attention_heads": 9,
+    "num_hidden_layers": 30,
+    "num_key_value_heads": 3,
+    "pad_token_id": None,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1.0e-05,
+    "rope_interleaved": False,
+    "rope_scaling": None,
+    "rope_theta": 10000.0,
+    "tie_word_embeddings": True,
+    "use_cache": True,
+    "vocab_size": 49152
+}
+# 1. Rotary Embedding
+class LlamaRotaryEmbedding(nn.Module):
+    def __init__(self, dim: int, theta: float = 10000.0):
+        super().__init__()
+        self.dim = dim
+        self.theta = theta
+    def forward(self, x):
+        batch_size, seq_len, _ = x.size()
+        device = x.device
+        # Create the position indices
+        position = torch.arange(seq_len, dtype=torch.float32, device=device).unsqueeze(1)  # Shape: (seq_len, 1)
+        freqs = torch.pow(self.theta, -torch.arange(0, self.dim, 2, dtype=torch.float32, device=device) / self.dim)  # Shape: (dim/2,)
+        # Reshape freqs for einsum: Shape (dim/2, 1) -> (dim/2, 1) broadcasting with position
+        freqs = freqs.unsqueeze(1)  # Shape: (dim/2, 1)
+        # Calculate sinusoidal embeddings
+        sinusoidal_embeddings = torch.einsum('i,j->ij', position.squeeze(), freqs.squeeze())  # Shape: (seq_len, dim/2)
+        # Sinusoidal encoding
+        sin = sinusoidal_embeddings.sin().unsqueeze(0)  # Shape: (1, seq_len, dim/2)
+        cos = sinusoidal_embeddings.cos().unsqueeze(0)  # Shape: (1, seq_len, dim/2)
+        # Concatenate the sin and cos values to create the final embedding
+        rotary_embeddings = torch.cat([sin, cos], dim=-1).unsqueeze(0)  # Shape: (1, seq_len, dim)
+        # Remove the extra leading dimension (1) to match input tensor shape
+        return rotary_embeddings.squeeze(0)  # Shape: (seq_len, dim)
+'''
+# Testing LlamaRotaryEmbedding again with the modified code
+rotary_emb = LlamaRotaryEmbedding(dim=576, theta=10000.0)
+input_tensor = torch.randn(2, 10, 576)  # (batch_size, seq_len, hidden_size)
+rotary_output = rotary_emb(input_tensor)
+print(f"Rotary embedding output shape: {rotary_output.shape}")
+'''
+# 2. Attention Layer
+class LlamaAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.q_proj = nn.Linear(config['hidden_size'], config['hidden_size'], bias=False)
+        self.k_proj = nn.Linear(config['hidden_size'], config['hidden_size'] // 3, bias=False)
+        self.v_proj = nn.Linear(config['hidden_size'], config['hidden_size'] // 3, bias=False)
+        self.o_proj = nn.Linear(config['hidden_size'] // 3, config['hidden_size'], bias=False)  # Adjust output projection size
+        self.rope_emb = LlamaRotaryEmbedding(config['hidden_size'])
+    def forward(self, x):
+        batch_size, seq_len, _ = x.size()  # Get the batch size and sequence length
+        q = self.q_proj(x)  # Shape: (batch_size, seq_len, hidden_size)
+        k = self.k_proj(x)  # Shape: (batch_size, seq_len, hidden_size // 3)
+        v = self.v_proj(x)  # Shape: (batch_size, seq_len, hidden_size // 3)
+        # Apply rotary embeddings (positional encoding)
+        q, k = self.rope_emb(q), self.rope_emb(k)
+        # Calculate attention weights (scaled dot-product attention)
+        attn_weights = torch.matmul(q, k.transpose(-2, -1))  # Shape: (batch_size, seq_len, seq_len)
+        attn_probs = torch.nn.functional.softmax(attn_weights, dim=-1)  # Shape: (batch_size, seq_len, seq_len)
+        # Apply attention to values
+        attn_output = torch.matmul(attn_probs, v)  # Shape: (batch_size, seq_len, hidden_size // 3)
+        # Output projection (adjusted to match hidden_size)
+        out = self.o_proj(attn_output)  # Shape: (batch_size, seq_len, hidden_size)
+        return out
+'''
+# Testing LlamaAttention again
+attention_layer = LlamaAttention(config)
+input_tensor = torch.randn(2, 10, 576)  # (batch_size, seq_len, hidden_size)
+attention_output = attention_layer(input_tensor)
+print(f"Attention output shape: {attention_output.shape}")
+'''
+# 3. MLP Layer
+class LlamaMLP(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.gate_proj = nn.Linear(config['hidden_size'], config['intermediate_size'], bias=False)  # Hidden size to intermediate size
+        self.up_proj = nn.Linear(config['intermediate_size'], config['intermediate_size'], bias=False)  # Intermediate size to intermediate size
+        self.down_proj = nn.Linear(config['intermediate_size'], config['hidden_size'], bias=False)  # Intermediate size to hidden size
+        self.act_fn = torch.nn.SiLU()  # Activation function
+    def forward(self, x):
+        batch_size, seq_len, _ = x.size()
+        # Flatten input to (batch_size * seq_len, hidden_size) for projection
+        x = x.view(batch_size * seq_len, -1)  # Shape: (batch_size * seq_len, hidden_size)
+        # Apply gate projection
+        x = self.gate_proj(x)  # Shape: (batch_size * seq_len, intermediate_size)
+        x = self.act_fn(x)  # Apply activation
+        # Apply up projection
+        x = self.up_proj(x)  # Shape: (batch_size * seq_len, intermediate_size)
+        # Apply down projection
+        x = self.down_proj(x)  # Shape: (batch_size * seq_len, hidden_size)
+        # Reshape back to (batch_size, seq_len, hidden_size)
+        x = x.view(batch_size, seq_len, -1)  # Shape: (batch_size, seq_len, hidden_size)
+        return x
+'''
+# Test the MLP again
+mlp_layer = LlamaMLP(config)
+input_tensor = torch.randn(2, 10, 576)  # (batch_size, seq_len, hidden_size)
+mlp_output = mlp_layer(input_tensor)
+print(f"MLP output shape: {mlp_output.shape}")
+'''
+# 4. Decoder Layer
+class LlamaDecoderLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self_attn = LlamaAttention(config)
+        self.mlp = LlamaMLP(config)
+        self.input_layernorm = nn.LayerNorm(config['hidden_size'], eps=config['rms_norm_eps'])
+        self.post_attention_layernorm = nn.LayerNorm(config['hidden_size'], eps=config['rms_norm_eps'])
+    def forward(self, x):
+        # Apply input normalization
+        x = self.input_layernorm(x)
+        # Attention
+        attn_output = self.self_attn(x)
+        x = x + attn_output  # Residual connection
+        # Apply post-attention layer normalization
+        x = self.post_attention_layernorm(x)
+        # Apply MLP
+        mlp_output = self.mlp(x)
+        x = x + mlp_output  # Residual connection
+        return x
+'''
+# Testing LlamaDecoderLayer
+decoder_layer = LlamaDecoderLayer(config)
+input_tensor = torch.randn(10, 2, 576)  # (seq_len, batch_size, hidden_size)
+decoder_output = decoder_layer(input_tensor)
+print(f"Decoder layer output shape: {decoder_output.shape}")
+# 5. Model
+class LlamaModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.embed_tokens = nn.Embedding(config['vocab_size'], config['hidden_size'])
+        # Partially shared decoder layers
+        self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config['num_hidden_layers'])])
+        # Separate adapters for each layer (adds more parameters)
+        self.adapters = nn.ModuleList([
+            nn.Linear(config['hidden_size'], config['hidden_size'], bias=False)
+            for _ in range(config['num_hidden_layers'])
+        ])
+        self.norm = nn.LayerNorm(config['hidden_size'], eps=config['rms_norm_eps'])
+    def forward(self, input_ids):
+        # Initial embedding lookup
+        x = self.embed_tokens(input_ids)
+        # Pass through transformer layers with unique adapters per layer
+        for i, layer in enumerate(self.layers):
+            x = layer(x)  # Apply the i-th decoder layer
+            x = x + self.adapters[i](x)  # Add per-layer adapter
+        # Apply the final layer normalization
+        x = self.norm(x)
+        return x
+'''
+class LlamaModel(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.embed_tokens = nn.Embedding(config['vocab_size'], config['hidden_size'])
+        self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config['num_hidden_layers'])])
+        self.norm = nn.LayerNorm(config['hidden_size'], eps=config['rms_norm_eps'])
+        self.rotary_emb = LlamaRotaryEmbedding(config['hidden_size'])
+    def forward(self, input_ids):
+        # Initial embedding lookup
+        x = self.embed_tokens(input_ids)
+        # Pass through the transformer layers
+        for layer in self.layers:
+            x = layer(x)
+        # Apply the final layer normalization
+        x = self.norm(x)
+        return x
+'''
+# Testing LlamaModel
+model = LlamaModel(config)
+input_ids = torch.randint(0, config['vocab_size'], (10, 2))  # (seq_len, batch_size)
+model_output = model(input_ids)
+print(f"Model output shape: {model_output.shape}")
+'''
+# 6. Causal Language Model (Final Model)
+class LlamaForCausalLM(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.model = LlamaModel(config)
+        # Share weights between the embedding and output layers
+        #self.lm_head = self.model.embed_tokens
+        self.lm_head= nn.Linear(config['hidden_size'], config['vocab_size'], bias=False)
+    def forward(self, input_ids):
+        hidden_states = self.model(input_ids)
+        logits = self.lm_head(hidden_states)
+        return logits
+# Testing LlamaForCausalLM
+'''
+causal_lm_model = LlamaForCausalLM(config_model)
+print(causal_lm_model)
+from torchinfo import summary
+summary( causal_lm_model )
+input_ids = torch.randint(0, config_model['vocab_size'], (10, 2))  # (seq_len, batch_size)
+logits = causal_lm_model(input_ids)
+print(f"Logits shape: {logits.shape}")
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+transformers
+torch
+datasets
+gradio