# upload_to_hf.py - Script to upload your Mamba Swarm to HuggingFace

import os
import shutil
from huggingface_hub import HfApi, upload_folder
import json

# Import the actual model classes
from modeling_mamba_swarm import MambaSwarmForCausalLM, MambaSwarmConfig

def prepare_model_repo():
    """Prepare model repository structure for HuggingFace"""
    
    # Create required files for HuggingFace model
    model_files = {
        "README.md": create_model_readme(),
        "config.json": create_model_config(),
        "requirements.txt": create_requirements(),
        "modeling_mamba_swarm.py": create_modeling_file()
    }
    
    # Create model repo directory
    os.makedirs("hf_model_repo", exist_ok=True)
    
    # Copy your mamba_swarm code
    shutil.copytree("mamba_swarm", "hf_model_repo/mamba_swarm", dirs_exist_ok=True)
    
    # Create HuggingFace specific files
    for filename, content in model_files.items():
        with open(f"hf_model_repo/{filename}", "w") as f:
            f.write(content)
    
    print("Model repository prepared!")

def create_model_readme():
    return """---
license: apache-2.0
language: 
- en
pipeline_tag: text-generation
tags:
- mamba
- swarm
- routing
- language-model
---

# Mamba Swarm: Dynamic Routing Language Model

A novel architecture combining 100 specialized Mamba encoders with dynamic routing and aggregation for efficient language modeling.

## Architecture

- **100 Mamba Encoders**: Specialized domain experts
- **Dynamic Router**: Selects relevant encoders per input
- **Aggregation Layer**: Combines encoder outputs
- **Mamba Decoder**: Generates final responses

## Usage

```python
from transformers import AutoModel, AutoTokenizer
from mamba_swarm import MambaSwarmEngine

# Load the model
model = MambaSwarmEngine.from_pretrained("your-username/mamba-swarm-model")
tokenizer = AutoTokenizer.from_pretrained("your-username/mamba-swarm-model")

# Generate text
input_text = "Explain quantum computing"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
```

## Training

This model uses a three-phase training approach:
1. Collective pre-training on general data
2. Domain specialization for encoder groups  
3. End-to-end coordination training

## Performance

- **Parameters**: ~7B total (100 × 70M encoders)
- **Domains**: Medical, Legal, Code, Science, General
- **Routing Efficiency**: Only 10-20% of encoders active per query

## Citation

```
@misc{mamba-swarm-2025,
  title={Mamba Swarm: Dynamic Routing for Efficient Language Modeling},
  author={Your Name},
  year={2025}
}
```
"""

def create_model_config():
    config = {
        "model_type": "mamba_swarm",
        "architectures": ["MambaSwarmForCausalLM"],
        "num_encoders": 100,
        "encoder_config": {
            "d_model": 768,
            "n_layer": 24,
            "vocab_size": 50280,
            "ssm_cfg": {},
            "rms_norm": True,
            "residual_in_fp32": True,
            "fused_add_norm": True
        },
        "router_config": {
            "top_k": 10,
            "routing_strategy": "content_based"
        },
        "aggregator_config": {
            "method": "weighted_sum",
            "attention_heads": 8
        },
        "torch_dtype": "float16",
        "use_cache": True
    }
    return json.dumps(config, indent=2)

def create_requirements():
    return """torch>=2.0.0
transformers>=4.35.0
mamba-ssm>=1.2.0
causal-conv1d>=1.2.0
numpy>=1.21.0
scipy>=1.7.0
triton>=2.0.0
einops>=0.6.1
packaging>=20.0
"""

def create_modeling_file():
    return """# modeling_mamba_swarm.py - HuggingFace integration

from transformers import PreTrainedModel, PretrainedConfig
from transformers.modeling_outputs import CausalLMOutputWithPast
import torch
import torch.nn as nn

class MambaSwarmConfig(PretrainedConfig):
    model_type = "mamba_swarm"
    
    def __init__(
        self,
        num_encoders=100,
        encoder_config=None,
        router_config=None,
        aggregator_config=None,
        **kwargs
    ):
        self.num_encoders = num_encoders
        self.encoder_config = encoder_config or {}
        self.router_config = router_config or {}
        self.aggregator_config = aggregator_config or {}
        super().__init__(**kwargs)

class MambaSwarmForCausalLM(PreTrainedModel):
    config_class = MambaSwarmConfig
    
    def __init__(self, config):
        super().__init__(config)
        
        # Import your actual implementation
        from mamba_swarm.system.swarm_engine import MambaSwarmEngine
        
        self.swarm_engine = MambaSwarmEngine(config)
        
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        labels=None,
        **kwargs
    ):
        # Your forward pass implementation
        outputs = self.swarm_engine(input_ids, attention_mask)
        
        loss = None
        if labels is not None:
            # Calculate loss
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(outputs.logits.view(-1, outputs.logits.size(-1)), labels.view(-1))
        
        return CausalLMOutputWithPast(
            loss=loss,
            logits=outputs.logits,
            past_key_values=outputs.past_key_values,
        )
    
    def generate(self, *args, **kwargs):
        return self.swarm_engine.generate(*args, **kwargs)
    
    @classmethod
    def from_pretrained(cls, model_name_or_path, *model_args, **kwargs):
        # Custom loading logic if needed
        return super().from_pretrained(model_name_or_path, *model_args, **kwargs)
"""

def upload_model():
    """Upload model code to HuggingFace"""
    api = HfApi()
    
    # Upload model repository
    upload_folder(
        folder_path="hf_model_repo",
        repo_id="your-username/mamba-swarm-model",  # Replace with your username
        repo_type="model",
        commit_message="Initial upload of Mamba Swarm model"
    )
    
    print("Model uploaded successfully!")

def upload_weights():
    """Upload model weights separately"""
    # This assumes you have trained weights in checkpoints/
    api = HfApi()
    
    upload_folder(
        folder_path="checkpoints",
        repo_id="your-username/mamba-swarm-weights",  # Replace with your username
        repo_type="model", 
        commit_message="Upload trained model weights"
    )
    
    print("Weights uploaded successfully!")

if __name__ == "__main__":
    prepare_model_repo()
    upload_model()
    # upload_weights()  # Uncomment when you have trained weights