Spaces:

Debito
/

mamba-encoder-swarm_app

Sleeping

App Files Files Community

Debito commited on 12 days ago

Commit

3fb2fb4

verified ·

1 Parent(s): bb70da7

Upload 3 files

Browse files

Files changed (3) hide show

utils/conv_layer.py +33 -0
utils/domain_configs.py +116 -0
utils/selective_scan.py +55 -0

utils/conv_layer.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# =============================================================================
+# utils/conv_layer.py
+# =============================================================================
+import torch
+import torch.nn as nn
+class Mamba1DConv(nn.Module):
+    def __init__(self, d_inner: int, d_conv: int = 4, bias: bool = True):
+        super().__init__()
+        self.d_conv = d_conv
+        self.conv1d = nn.Conv1d(
+            in_channels=d_inner,
+            out_channels=d_inner,
+            kernel_size=d_conv,
+            bias=bias,
+            groups=d_inner,  # Depthwise convolution
+            padding=d_conv - 1
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: [batch, seq_len, d_inner]
+        Returns:
+            x: [batch, seq_len, d_inner]
+        """
+        # Conv1d expects [batch, channels, seq_len]
+        x = x.transpose(1, 2)  # [batch, d_inner, seq_len]
+        x = self.conv1d(x)
+        x = x[:, :, :-(self.d_conv-1)]  # Remove padding
+        x = x.transpose(1, 2)  # [batch, seq_len, d_inner]
+        return x

utils/domain_configs.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# =============================================================================
+# utils/domain_configs.py
+# =============================================================================
+from typing import Dict, List
+from core.config import MambaConfig
+class DomainConfigs:
+    """Configurations for different specialist domains"""
+    DOMAINS = {
+        # STEM domains
+        "mathematics": {
+            "keywords": ["equation", "theorem", "proof", "calculate", "derivative", "integral", "matrix", "algebra", "geometry", "statistics"],
+            "description": "Mathematical reasoning and computation"
+        },
+        "physics": {
+            "keywords": ["force", "energy", "momentum", "quantum", "relativity", "particle", "wave", "thermodynamics", "mechanics"],
+            "description": "Physics concepts and problems"
+        },
+        "chemistry": {
+            "keywords": ["molecule", "atom", "reaction", "compound", "bond", "element", "organic", "inorganic", "catalyst"],
+            "description": "Chemistry and molecular science"
+        },
+        "biology": {
+            "keywords": ["cell", "DNA", "protein", "organism", "evolution", "genetics", "ecology", "anatomy", "physiology"],
+            "description": "Biological sciences"
+        },
+        # Programming domains
+        "python": {
+            "keywords": ["def", "class", "import", "python", "pandas", "numpy", "matplotlib", "sklearn", "tensorflow"],
+            "description": "Python programming and data science"
+        },
+        "javascript": {
+            "keywords": ["function", "var", "let", "const", "javascript", "react", "node", "async", "promise"],
+            "description": "JavaScript and web development"
+        },
+        "systems": {
+            "keywords": ["linux", "server", "network", "database", "docker", "kubernetes", "cloud", "devops"],
+            "description": "Systems programming and infrastructure"
+        },
+        # Language domains
+        "writing": {
+            "keywords": ["essay", "article", "story", "paragraph", "thesis", "narrative", "prose", "literature"],
+            "description": "Creative and technical writing"
+        },
+        "translation": {
+            "keywords": ["translate", "language", "spanish", "french", "german", "chinese", "japanese", "korean"],
+            "description": "Language translation and linguistics"
+        },
+        # Business domains
+        "business": {
+            "keywords": ["market", "strategy", "finance", "management", "revenue", "profit", "customer", "sales"],
+            "description": "Business and economics"
+        },
+        "legal": {
+            "keywords": ["law", "contract", "court", "legal", "attorney", "judge", "case", "statute", "regulation"],
+            "description": "Legal reasoning and analysis"
+        },
+        # Other domains
+        "history": {
+            "keywords": ["war", "empire", "civilization", "century", "ancient", "medieval", "revolution", "dynasty"],
+            "description": "Historical knowledge and analysis"
+        },
+        "philosophy": {
+            "keywords": ["ethics", "moral", "logic", "metaphysics", "epistemology", "consciousness", "existence"],
+            "description": "Philosophical reasoning"
+        },
+        "medical": {
+            "keywords": ["patient", "diagnosis", "treatment", "disease", "medicine", "surgery", "therapy", "symptom"],
+            "description": "Medical knowledge and healthcare"
+        },
+        "arts": {
+            "keywords": ["painting", "music", "sculpture", "artist", "gallery", "museum", "aesthetic", "culture"],
+            "description": "Arts and cultural topics"
+        }
+    }
+    @classmethod
+    def get_domain_configs(cls, num_specialists: int = 100) -> List[Dict]:
+        """Generate configurations for specialist domains"""
+        configs = []
+        base_domains = list(cls.DOMAINS.keys())
+        # Create configurations
+        for i in range(num_specialists):
+            if i < len(base_domains):
+                # Use predefined domains
+                domain_name = base_domains[i]
+                domain_info = cls.DOMAINS[domain_name]
+            else:
+                # Create sub-specializations or general domains
+                base_idx = i % len(base_domains)
+                domain_name = f"{base_domains[base_idx]}_sub_{i}"
+                domain_info = cls.DOMAINS[base_domains[base_idx]]
+            config = {
+                "id": i,
+                "name": domain_name,
+                "keywords": domain_info["keywords"],
+                "description": domain_info["description"],
+                "weight": 1.0  # Can be adjusted based on importance
+            }
+            configs.append(config)
+        return configs
+    @classmethod
+    def create_specialist_config(cls, base_config: MambaConfig, domain_id: int) -> MambaConfig:
+        """Create a specialist configuration for a specific domain"""
+        specialist_config = MambaConfig(**base_config.__dict__)
+        specialist_config.specialist_id = domain_id
+        return specialist_config

utils/selective_scan.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# =============================================================================
+# utils/selective_scan.py
+# =============================================================================
+import torch
+import torch.nn.functional as F
+from typing import Tuple
+def selective_scan_fn(u, delta, A, B, C, D=None, z=None, delta_bias=None, delta_softplus=False):
+    """
+    Selective scan function - core of Mamba's state space model
+    Args:
+        u: input sequence [batch, seq_len, d_inner]
+        delta: time step [batch, seq_len, d_inner]
+        A: state matrix [d_inner, d_state]
+        B: input matrix [batch, seq_len, d_state]
+        C: output matrix [batch, seq_len, d_state]
+        D: skip connection [d_inner]
+        z: gating [batch, seq_len, d_inner] (optional)
+        delta_bias: bias for delta (optional)
+        delta_softplus: whether to apply softplus to delta
+    Returns:
+        y: output [batch, seq_len, d_inner]
+    """
+    batch_size, seq_len, d_inner = u.shape
+    d_state = A.shape[1]
+    if delta_bias is not None:
+        delta = delta + delta_bias[None, None, :]
+    if delta_softplus:
+        delta = F.softplus(delta)
+    # Discretization
+    deltaA = torch.exp(delta.unsqueeze(-1) * A)  # [batch, seq_len, d_inner, d_state]
+    deltaB_u = delta.unsqueeze(-1) * B.unsqueeze(2) * u.unsqueeze(-1)  # [batch, seq_len, d_inner, d_state]
+    # Initialize hidden state
+    h = torch.zeros(batch_size, d_inner, d_state, device=u.device, dtype=u.dtype)
+    outputs = []
+    for i in range(seq_len):
+        h = deltaA[:, i] * h + deltaB_u[:, i]  # State update
+        y = torch.sum(h * C[:, i].unsqueeze(1), dim=-1)  # Output projection
+        if D is not None:
+            y = y + D * u[:, i]
+        outputs.append(y)
+    y = torch.stack(outputs, dim=1)  # [batch, seq_len, d_inner]
+    if z is not None:
+        y = y * F.silu(z)
+    return y