Spaces:

Debito
/

mamba-encoder-swarm_app

Sleeping

App Files Files Community

Debito commited on Aug 3

Commit

e6d86b2

verified ·

1 Parent(s): 7aad614

Upload 3 files

Browse files

Files changed (3) hide show

configuration_mamba_swarm.py +58 -0
tokenizer.py +63 -0
vocab.json +0 -0

configuration_mamba_swarm.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from transformers import PretrainedConfig
+class MambaSwarmConfig(PretrainedConfig):
+    model_type = "mamba_swarm"
+    def __init__(
+        self,
+        num_mamba_encoders=5,
+        max_mamba_encoders=1000,
+        d_model=768,
+        d_state=16,
+        d_conv=4,
+        expand_factor=2,
+        vocab_size=50257,
+        max_sequence_length=2048,
+        pad_token_id=50256,
+        bos_token_id=50256,
+        eos_token_id=50256,
+        tie_word_embeddings=False,
+        use_cache=True,
+        gating_config=None,
+        routing_config=None,
+        **kwargs
+    ):
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs
+        )
+        self.num_mamba_encoders = num_mamba_encoders
+        self.max_mamba_encoders = max_mamba_encoders
+        self.d_model = d_model
+        self.d_state = d_state
+        self.d_conv = d_conv
+        self.expand_factor = expand_factor
+        self.vocab_size = vocab_size
+        self.max_sequence_length = max_sequence_length
+        self.use_cache = use_cache
+        # Default gating configuration
+        if gating_config is None:
+            gating_config = {
+                "gating_type": "learned",
+                "top_k": 2,
+                "load_balancing_loss_coef": 0.01
+            }
+        self.gating_config = gating_config
+        # Default routing configuration
+        if routing_config is None:
+            routing_config = {
+                "routing_strategy": "dynamic",
+                "aggregation_method": "weighted_average"
+            }
+        self.routing_config = routing_config

tokenizer.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# =============================================================================
+# core/tokenizer.py
+# =============================================================================
+from transformers import AutoTokenizer
+import torch
+from config import MambaConfig
+from typing import List, Dict, Union
+class MambaTokenizer:
+    def __init__(self, config: MambaConfig, tokenizer_name: str = "gpt2"):
+        self.config = config
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+        # Add special tokens if needed
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.vocab_size = len(self.tokenizer)
+    def encode(self, text: str, max_length: int = None) -> Dict[str, torch.Tensor]:
+        """Encode text to token ids"""
+        if max_length is None:
+            max_length = self.config.max_seq_len
+        encoded = self.tokenizer(
+            text,
+            max_length=max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt"
+        )
+        return {
+            "input_ids": encoded["input_ids"],
+            "attention_mask": encoded["attention_mask"]
+        }
+    def encode_batch(self, texts: List[str], max_length: int = None) -> Dict[str, torch.Tensor]:
+        """Encode batch of texts"""
+        if max_length is None:
+            max_length = self.config.max_seq_len
+        encoded = self.tokenizer(
+            texts,
+            max_length=max_length,
+            padding="max_length",
+            truncation=True,
+            return_tensors="pt"
+        )
+        return {
+            "input_ids": encoded["input_ids"],
+            "attention_mask": encoded["attention_mask"]
+        }
+    def decode(self, token_ids: torch.Tensor, skip_special_tokens: bool = True) -> str:
+        """Decode token ids to text"""
+        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
+    def decode_batch(self, token_ids: torch.Tensor, skip_special_tokens: bool = True) -> List[str]:
+        """Decode batch of token ids"""
+        return self.tokenizer.batch_decode(token_ids, skip_special_tokens=skip_special_tokens)

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff