Spaces:
Sleeping
Sleeping
File size: 6,876 Bytes
336b228 7aad614 336b228 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
# upload_to_hf.py - Script to upload your Mamba Swarm to HuggingFace
import os
import shutil
from huggingface_hub import HfApi, upload_folder
import json
# Import the actual model classes
from modeling_mamba_swarm import MambaSwarmForCausalLM, MambaSwarmConfig
def prepare_model_repo():
"""Prepare model repository structure for HuggingFace"""
# Create required files for HuggingFace model
model_files = {
"README.md": create_model_readme(),
"config.json": create_model_config(),
"requirements.txt": create_requirements(),
"modeling_mamba_swarm.py": create_modeling_file()
}
# Create model repo directory
os.makedirs("hf_model_repo", exist_ok=True)
# Copy your mamba_swarm code
shutil.copytree("mamba_swarm", "hf_model_repo/mamba_swarm", dirs_exist_ok=True)
# Create HuggingFace specific files
for filename, content in model_files.items():
with open(f"hf_model_repo/{filename}", "w") as f:
f.write(content)
print("Model repository prepared!")
def create_model_readme():
return """---
license: apache-2.0
language:
- en
pipeline_tag: text-generation
tags:
- mamba
- swarm
- routing
- language-model
---
# Mamba Swarm: Dynamic Routing Language Model
A novel architecture combining 100 specialized Mamba encoders with dynamic routing and aggregation for efficient language modeling.
## Architecture
- **100 Mamba Encoders**: Specialized domain experts
- **Dynamic Router**: Selects relevant encoders per input
- **Aggregation Layer**: Combines encoder outputs
- **Mamba Decoder**: Generates final responses
## Usage
```python
from transformers import AutoModel, AutoTokenizer
from mamba_swarm import MambaSwarmEngine
# Load the model
model = MambaSwarmEngine.from_pretrained("your-username/mamba-swarm-model")
tokenizer = AutoTokenizer.from_pretrained("your-username/mamba-swarm-model")
# Generate text
input_text = "Explain quantum computing"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
```
## Training
This model uses a three-phase training approach:
1. Collective pre-training on general data
2. Domain specialization for encoder groups
3. End-to-end coordination training
## Performance
- **Parameters**: ~7B total (100 × 70M encoders)
- **Domains**: Medical, Legal, Code, Science, General
- **Routing Efficiency**: Only 10-20% of encoders active per query
## Citation
```
@misc{mamba-swarm-2025,
title={Mamba Swarm: Dynamic Routing for Efficient Language Modeling},
author={Your Name},
year={2025}
}
```
"""
def create_model_config():
config = {
"model_type": "mamba_swarm",
"architectures": ["MambaSwarmForCausalLM"],
"num_encoders": 100,
"encoder_config": {
"d_model": 768,
"n_layer": 24,
"vocab_size": 50280,
"ssm_cfg": {},
"rms_norm": True,
"residual_in_fp32": True,
"fused_add_norm": True
},
"router_config": {
"top_k": 10,
"routing_strategy": "content_based"
},
"aggregator_config": {
"method": "weighted_sum",
"attention_heads": 8
},
"torch_dtype": "float16",
"use_cache": True
}
return json.dumps(config, indent=2)
def create_requirements():
return """torch>=2.0.0
transformers>=4.35.0
mamba-ssm>=1.2.0
causal-conv1d>=1.2.0
numpy>=1.21.0
scipy>=1.7.0
triton>=2.0.0
einops>=0.6.1
packaging>=20.0
"""
def create_modeling_file():
return """# modeling_mamba_swarm.py - HuggingFace integration
from transformers import PreTrainedModel, PretrainedConfig
from transformers.modeling_outputs import CausalLMOutputWithPast
import torch
import torch.nn as nn
class MambaSwarmConfig(PretrainedConfig):
model_type = "mamba_swarm"
def __init__(
self,
num_encoders=100,
encoder_config=None,
router_config=None,
aggregator_config=None,
**kwargs
):
self.num_encoders = num_encoders
self.encoder_config = encoder_config or {}
self.router_config = router_config or {}
self.aggregator_config = aggregator_config or {}
super().__init__(**kwargs)
class MambaSwarmForCausalLM(PreTrainedModel):
config_class = MambaSwarmConfig
def __init__(self, config):
super().__init__(config)
# Import your actual implementation
from mamba_swarm.system.swarm_engine import MambaSwarmEngine
self.swarm_engine = MambaSwarmEngine(config)
def forward(
self,
input_ids=None,
attention_mask=None,
labels=None,
**kwargs
):
# Your forward pass implementation
outputs = self.swarm_engine(input_ids, attention_mask)
loss = None
if labels is not None:
# Calculate loss
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(outputs.logits.view(-1, outputs.logits.size(-1)), labels.view(-1))
return CausalLMOutputWithPast(
loss=loss,
logits=outputs.logits,
past_key_values=outputs.past_key_values,
)
def generate(self, *args, **kwargs):
return self.swarm_engine.generate(*args, **kwargs)
@classmethod
def from_pretrained(cls, model_name_or_path, *model_args, **kwargs):
# Custom loading logic if needed
return super().from_pretrained(model_name_or_path, *model_args, **kwargs)
"""
def upload_model():
"""Upload model code to HuggingFace"""
api = HfApi()
# Upload model repository
upload_folder(
folder_path="hf_model_repo",
repo_id="your-username/mamba-swarm-model", # Replace with your username
repo_type="model",
commit_message="Initial upload of Mamba Swarm model"
)
print("Model uploaded successfully!")
def upload_weights():
"""Upload model weights separately"""
# This assumes you have trained weights in checkpoints/
api = HfApi()
upload_folder(
folder_path="checkpoints",
repo_id="your-username/mamba-swarm-weights", # Replace with your username
repo_type="model",
commit_message="Upload trained model weights"
)
print("Weights uploaded successfully!")
if __name__ == "__main__":
prepare_model_repo()
upload_model()
# upload_weights() # Uncomment when you have trained weights |