Spaces:

Debito
/

mamba-encoder-swarm_app

Sleeping

App Files Files Community

Debito commited on 12 days ago

Commit

b1366ef

verified ·

1 Parent(s): 4e66afc

Upload app.py

Browse files

Files changed (1) hide show

app.py +159 -38

app.py CHANGED Viewed

@@ -730,13 +730,13 @@ class UltimateMambaSwarm:
         # Enhanced domain detection with confidence scoring
         self.domain_keywords = {
-            'medical': ['medical', 'health', 'doctor', 'patient', 'disease', 'treatment', 'symptom', 'diagnosis', 'medicine', 'hospital'],
-            'legal': ['legal', 'law', 'court', 'judge', 'contract', 'attorney', 'lawyer', 'legislation', 'rights', 'lawsuit'],
-            'code': ['code', 'python', 'programming', 'function', 'algorithm', 'software', 'debug', 'script', 'developer', 'syntax', 'variable', 'loop', 'class', 'method', 'library', 'framework', 'api', 'database', 'web development'],
-            'science': ['science', 'research', 'experiment', 'theory', 'physics', 'chemistry', 'biology', 'scientific', 'hypothesis'],
-            'creative': ['story', 'creative', 'write', 'novel', 'poem', 'character', 'fiction', 'narrative', 'art', 'imagination'],
-            'business': ['business', 'marketing', 'strategy', 'finance', 'management', 'economics', 'profit', 'company', 'entrepreneur'],
-            'general': ['explain', 'what', 'how', 'why', 'describe', 'tell', 'help', 'question', 'information', 'knowledge']
         }
         # Initialize with default model
@@ -774,35 +774,72 @@ class UltimateMambaSwarm:
             logger.error(f"System initialization failed: {e}")
     def detect_domain_advanced(self, prompt: str) -> Tuple[str, float]:
-        """Advanced domain detection with confidence scoring"""
         prompt_lower = prompt.lower()
         domain_scores = {}
         for domain, keywords in self.domain_keywords.items():
-            matches = sum(1 for keyword in keywords if keyword in prompt_lower)
-            if matches > 0:
-                # Weight by keyword frequency and length
-                score = matches / len(keywords)
                 # Bonus for multiple matches
-                if matches > 1:
-                    score *= 1.2
-                # Special bonus for programming-related terms
                 if domain == 'code':
-                    # Extra bonus for Python-specific terms
-                    python_terms = ['python', 'programming language', 'sample code', 'code example', 'script', 'syntax']
-                    python_matches = sum(1 for term in python_terms if term in prompt_lower)
-                    if python_matches > 0:
-                        score *= 1.5  # Strong boost for programming queries
-                    # Bonus for code-specific patterns
-                    if any(word in prompt_lower for word in ['def ', 'class ', 'import ', 'for ', 'if ', 'sample code']):
-                        score *= 1.3
-                domain_scores[domain] = score
         if domain_scores:
             best_domain = max(domain_scores, key=domain_scores.get)
             confidence = min(domain_scores[best_domain], 1.0)
             return best_domain, confidence
         return 'general', 0.5
     def simulate_advanced_encoder_routing(self, domain: str, confidence: float, num_encoders: int, model_size: str) -> Dict:
@@ -910,19 +947,64 @@ class UltimateMambaSwarm:
     def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float, domain: str = 'general') -> str:
         """Generate using loaded model with ultimate optimization and content safety"""
         try:
             # Get optimal parameters
             gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
-            # Add content safety parameters
-            gen_params.update({
-                "repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
-                "no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
-                "temperature": min(gen_params.get("temperature", 0.7), 0.8),  # Cap temperature for safety
-                "top_p": min(gen_params.get("top_p", 0.9), 0.85)  # More focused sampling
-            })
-            # Create safer prompt format
-            safe_prompt = f"Question: {prompt}\nAnswer:"
             # Tokenize with safety
             inputs = self.model_loader.tokenizer.encode(
@@ -1001,19 +1083,45 @@ class UltimateMambaSwarm:
     def _is_response_too_generic(self, response: str, prompt: str, domain: str) -> bool:
         """Check if response is too generic and doesn't address the domain-specific prompt"""
         if not response or len(response.strip()) < 20:
             return True
         response_lower = response.lower()
         prompt_lower = prompt.lower()
-        # For code domain, check if it actually addresses programming
         if domain == 'code':
-            code_indicators = ['python', 'code', 'programming', 'function', 'variable', 'syntax', 'example', 'script', 'library']
-            if not any(indicator in response_lower for indicator in code_indicators):
                 return True
         # Check if response is just repeating the prompt without answering
-        if response_lower.startswith(prompt_lower[:20]):
             return True
         # Check for overly generic responses
@@ -1023,13 +1131,26 @@ class UltimateMambaSwarm:
             'it depends on various factors',
             'this requires careful consideration',
             'multiple perspectives',
-            'interconnected concepts'
         ]
         generic_count = sum(1 for pattern in generic_patterns if pattern in response_lower)
         if generic_count >= 2:  # Too many generic phrases
             return True
         return False
     def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:

         # Enhanced domain detection with confidence scoring
         self.domain_keywords = {
+            'medical': ['medical', 'health', 'doctor', 'patient', 'disease', 'treatment', 'symptom', 'diagnosis', 'medicine', 'hospital', 'clinical', 'therapy', 'pharmaceutical', 'healthcare', 'surgeon', 'nurse', 'clinic', 'prescription', 'dosage', 'vaccine'],
+            'legal': ['legal', 'law', 'court', 'judge', 'contract', 'attorney', 'lawyer', 'legislation', 'rights', 'lawsuit', 'statute', 'regulation', 'jurisdiction', 'litigation', 'defendant', 'plaintiff', 'evidence', 'testimony', 'verdict', 'appeal'],
+            'code': ['code', 'python', 'programming', 'function', 'algorithm', 'software', 'debug', 'script', 'developer', 'syntax', 'variable', 'loop', 'class', 'method', 'library', 'framework', 'api', 'database', 'web development', 'javascript', 'html', 'css', 'react', 'node', 'git', 'github', 'programming language', 'coding', 'development', 'computer science', 'data structure', 'array', 'list', 'dictionary', 'string', 'integer', 'boolean', 'import', 'def', 'if', 'else', 'for', 'while', 'try', 'except', 'return', 'print', 'input', 'output', 'file', 'json', 'xml', 'csv', 'pandas', 'numpy', 'matplotlib', 'sklearn', 'tensorflow', 'pytorch', 'machine learning', 'ai', 'artificial intelligence', 'neural network', 'deep learning', 'model training', 'regression', 'classification', 'clustering', 'supervised', 'unsupervised', 'reinforcement learning'],
+            'science': ['science', 'research', 'experiment', 'theory', 'physics', 'chemistry', 'biology', 'scientific', 'hypothesis', 'laboratory', 'analysis', 'data', 'observation', 'methodology', 'peer review', 'publication', 'journal', 'academic', 'study', 'quantum', 'molecular', 'genetic', 'evolution', 'ecosystem', 'climate', 'astronomy', 'geology', 'mathematics', 'statistics', 'engineering'],
+            'creative': ['story', 'creative', 'write', 'novel', 'poem', 'character', 'fiction', 'narrative', 'art', 'imagination', 'plot', 'dialogue', 'setting', 'theme', 'author', 'writing', 'literature', 'poetry', 'drama', 'screenplay', 'script', 'book', 'chapter', 'scene', 'metaphor', 'symbolism', 'style', 'voice', 'tone'],
+            'business': ['business', 'marketing', 'strategy', 'finance', 'management', 'economics', 'profit', 'company', 'entrepreneur', 'startup', 'investment', 'revenue', 'sales', 'customer', 'market', 'competition', 'brand', 'product', 'service', 'leadership', 'team', 'organization', 'budget', 'roi', 'kpi', 'analytics', 'growth', 'scale', 'innovation'],
+            'general': ['explain', 'what', 'how', 'why', 'describe', 'tell', 'help', 'question', 'information', 'knowledge', 'understand', 'learn', 'teach', 'example', 'definition', 'meaning', 'concept', 'idea', 'topic', 'subject']
         }
         # Initialize with default model
             logger.error(f"System initialization failed: {e}")
     def detect_domain_advanced(self, prompt: str) -> Tuple[str, float]:
+        """Advanced domain detection with confidence scoring and debugging"""
         prompt_lower = prompt.lower()
         domain_scores = {}
+        print(f"🔍 Domain Detection Debug: Analyzing prompt: '{prompt[:50]}...'")
         for domain, keywords in self.domain_keywords.items():
+            matches = []
+            for keyword in keywords:
+                if keyword in prompt_lower:
+                    matches.append(keyword)
+            if matches:
+                # Enhanced scoring algorithm
+                base_score = len(matches) / len(keywords)
                 # Bonus for multiple matches
+                if len(matches) > 1:
+                    base_score *= (1.0 + 0.1 * len(matches))  # Progressive bonus
+                # Special bonuses for specific domains
                 if domain == 'code':
+                    # Strong bonus for programming-specific terms
+                    programming_terms = ['python', 'programming', 'code', 'function', 'script', 'algorithm', 'development', 'coding']
+                    programming_matches = sum(1 for term in programming_terms if term in matches)
+                    if programming_matches > 0:
+                        base_score *= 2.0  # Double score for programming
+                    # Extra bonus for code syntax patterns
+                    code_patterns = ['def ', 'class ', 'import ', 'for ', 'while ', 'if ', 'else:', 'try:', 'except:', 'return ', 'print(', 'input(']
+                    pattern_matches = sum(1 for pattern in code_patterns if pattern in prompt_lower)
+                    if pattern_matches > 0:
+                        base_score *= (1.5 + 0.2 * pattern_matches)
+                elif domain == 'medical':
+                    # Bonus for medical terminology
+                    medical_terms = ['medical', 'health', 'doctor', 'patient', 'treatment', 'diagnosis']
+                    medical_matches = sum(1 for term in medical_terms if term in matches)
+                    if medical_matches > 0:
+                        base_score *= 1.8
+                elif domain == 'science':
+                    # Bonus for scientific methodology terms
+                    science_terms = ['research', 'experiment', 'theory', 'hypothesis', 'analysis', 'study']
+                    science_matches = sum(1 for term in science_terms if term in matches)
+                    if science_matches > 0:
+                        base_score *= 1.6
+                # Cap the score to reasonable levels
+                domain_scores[domain] = min(base_score, 2.0)
+                print(f"  📊 {domain}: {len(matches)} matches {matches[:3]}{'...' if len(matches) > 3 else ''} → Score: {domain_scores[domain]:.3f}")
+        # Determine best domain
         if domain_scores:
             best_domain = max(domain_scores, key=domain_scores.get)
             confidence = min(domain_scores[best_domain], 1.0)
+            # Ensure minimum confidence threshold for specialized domains
+            if best_domain != 'general' and confidence < 0.3:
+                print(f"  ⚠️  Low confidence ({confidence:.3f}) for {best_domain}, falling back to general")
+                return 'general', 0.5
+            print(f"  ✅ Selected Domain: {best_domain} (confidence: {confidence:.3f})")
             return best_domain, confidence
+        print(f"  🔄 No specific domain detected, using general")
         return 'general', 0.5
     def simulate_advanced_encoder_routing(self, domain: str, confidence: float, num_encoders: int, model_size: str) -> Dict:
     def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float, domain: str = 'general') -> str:
         """Generate using loaded model with ultimate optimization and content safety"""
         try:
+            print(f"🎯 Generating for domain: {domain}")
             # Get optimal parameters
             gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
+            # Domain-specific parameter adjustments
+            if domain == 'code':
+                # More deterministic for code generation
+                gen_params.update({
+                    "temperature": min(gen_params.get("temperature", 0.3), 0.4),
+                    "top_p": min(gen_params.get("top_p", 0.8), 0.85),
+                    "repetition_penalty": 1.1
+                })
+                # Domain-specific prompt formatting
+                if any(keyword in prompt.lower() for keyword in ['function', 'code', 'python', 'programming', 'script']):
+                    safe_prompt = f"Programming Task: {prompt}\n\nSolution:"
+                else:
+                    safe_prompt = f"Technical Question: {prompt}\nAnswer:"
+            elif domain == 'medical':
+                # Conservative parameters for medical content
+                gen_params.update({
+                    "temperature": min(gen_params.get("temperature", 0.5), 0.6),
+                    "top_p": min(gen_params.get("top_p", 0.8), 0.85),
+                    "repetition_penalty": 1.2
+                })
+                safe_prompt = f"Medical Query: {prompt}\nProfessional Response:"
+            elif domain == 'science':
+                # Balanced parameters for scientific accuracy
+                gen_params.update({
+                    "temperature": min(gen_params.get("temperature", 0.6), 0.7),
+                    "top_p": min(gen_params.get("top_p", 0.85), 0.9),
+                    "repetition_penalty": 1.15
+                })
+                safe_prompt = f"Scientific Question: {prompt}\nAnalysis:"
+            elif domain == 'creative':
+                # More creative parameters
+                gen_params.update({
+                    "temperature": max(gen_params.get("temperature", 0.8), 0.7),
+                    "top_p": max(gen_params.get("top_p", 0.9), 0.85),
+                    "repetition_penalty": 1.05
+                })
+                safe_prompt = f"Creative Prompt: {prompt}\nResponse:"
+            else:
+                # General domain - balanced approach
+                gen_params.update({
+                    "repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
+                    "no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
+                    "temperature": min(gen_params.get("temperature", 0.7), 0.8),
+                    "top_p": min(gen_params.get("top_p", 0.9), 0.85)
+                })
+                safe_prompt = f"Question: {prompt}\nAnswer:"
+            print(f"📝 Using prompt format: '{safe_prompt[:50]}...'")
+            print(f"⚙️  Generation params: temp={gen_params['temperature']:.2f}, top_p={gen_params['top_p']:.2f}")
             # Tokenize with safety
             inputs = self.model_loader.tokenizer.encode(
     def _is_response_too_generic(self, response: str, prompt: str, domain: str) -> bool:
         """Check if response is too generic and doesn't address the domain-specific prompt"""
         if not response or len(response.strip()) < 20:
+            print(f"⚠️  Response too short: {len(response)} chars")
             return True
         response_lower = response.lower()
         prompt_lower = prompt.lower()
+        print(f"🔍 Quality Check - Domain: {domain}, Response: '{response[:50]}...'")
+        # Domain-specific validation
         if domain == 'code':
+            # Must contain programming-related terms for code domain
+            code_indicators = ['python', 'code', 'programming', 'function', 'variable', 'syntax', 'example', 'script', 'library', 'def ', 'class', 'import', 'algorithm', 'development', 'software']
+            code_matches = sum(1 for indicator in code_indicators if indicator in response_lower)
+            if code_matches == 0:
+                print(f"⚠️  No code indicators found in response for code domain")
                 return True
+            print(f"✅ Found {code_matches} code indicators")
+        elif domain == 'medical':
+            # Must contain medical terminology
+            medical_indicators = ['medical', 'health', 'treatment', 'clinical', 'patient', 'diagnosis', 'therapy', 'healthcare', 'medicine', 'doctor']
+            medical_matches = sum(1 for indicator in medical_indicators if indicator in response_lower)
+            if medical_matches == 0:
+                print(f"⚠️  No medical indicators found in response for medical domain")
+                return True
+            print(f"✅ Found {medical_matches} medical indicators")
+        elif domain == 'science':
+            # Must contain scientific terminology
+            science_indicators = ['research', 'study', 'analysis', 'experiment', 'theory', 'hypothesis', 'scientific', 'methodology', 'data', 'evidence']
+            science_matches = sum(1 for indicator in science_indicators if indicator in response_lower)
+            if science_matches == 0:
+                print(f"⚠️  No science indicators found in response for science domain")
+                return True
+            print(f"✅ Found {science_matches} science indicators")
         # Check if response is just repeating the prompt without answering
+        if len(prompt_lower) > 10 and response_lower.startswith(prompt_lower[:15]):
+            print(f"⚠️  Response just repeats the prompt")
             return True
         # Check for overly generic responses
             'it depends on various factors',
             'this requires careful consideration',
             'multiple perspectives',
+            'interconnected concepts',
+            'this is an interesting question',
+            'there are several approaches',
+            'it\'s important to consider'
         ]
         generic_count = sum(1 for pattern in generic_patterns if pattern in response_lower)
         if generic_count >= 2:  # Too many generic phrases
+            print(f"⚠️  Too many generic phrases ({generic_count})")
             return True
+        # Check for responses that don't actually answer the question
+        question_indicators = ['what', 'how', 'why', 'when', 'where', 'which', 'explain', 'describe', 'create', 'write', 'make', 'build']
+        if any(indicator in prompt_lower for indicator in question_indicators):
+            # This is clearly a question, response should provide specific information
+            if len(response.split()) < 30:  # Very short response to a clear question
+                print(f"⚠️  Very short response ({len(response.split())} words) to a clear question")
+                return True
+        print(f"✅ Response passed quality checks")
         return False
     def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str: