Debito commited on
Commit
3d6b209
Β·
verified Β·
1 Parent(s): b1366ef

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -63
app.py CHANGED
@@ -728,17 +728,51 @@ class UltimateMambaSwarm:
728
  self.model_loaded = False
729
  self.current_model_size = "auto"
730
 
731
- # Enhanced domain detection with confidence scoring
732
- self.domain_keywords = {
733
- 'medical': ['medical', 'health', 'doctor', 'patient', 'disease', 'treatment', 'symptom', 'diagnosis', 'medicine', 'hospital', 'clinical', 'therapy', 'pharmaceutical', 'healthcare', 'surgeon', 'nurse', 'clinic', 'prescription', 'dosage', 'vaccine'],
734
- 'legal': ['legal', 'law', 'court', 'judge', 'contract', 'attorney', 'lawyer', 'legislation', 'rights', 'lawsuit', 'statute', 'regulation', 'jurisdiction', 'litigation', 'defendant', 'plaintiff', 'evidence', 'testimony', 'verdict', 'appeal'],
735
- 'code': ['code', 'python', 'programming', 'function', 'algorithm', 'software', 'debug', 'script', 'developer', 'syntax', 'variable', 'loop', 'class', 'method', 'library', 'framework', 'api', 'database', 'web development', 'javascript', 'html', 'css', 'react', 'node', 'git', 'github', 'programming language', 'coding', 'development', 'computer science', 'data structure', 'array', 'list', 'dictionary', 'string', 'integer', 'boolean', 'import', 'def', 'if', 'else', 'for', 'while', 'try', 'except', 'return', 'print', 'input', 'output', 'file', 'json', 'xml', 'csv', 'pandas', 'numpy', 'matplotlib', 'sklearn', 'tensorflow', 'pytorch', 'machine learning', 'ai', 'artificial intelligence', 'neural network', 'deep learning', 'model training', 'regression', 'classification', 'clustering', 'supervised', 'unsupervised', 'reinforcement learning'],
736
- 'science': ['science', 'research', 'experiment', 'theory', 'physics', 'chemistry', 'biology', 'scientific', 'hypothesis', 'laboratory', 'analysis', 'data', 'observation', 'methodology', 'peer review', 'publication', 'journal', 'academic', 'study', 'quantum', 'molecular', 'genetic', 'evolution', 'ecosystem', 'climate', 'astronomy', 'geology', 'mathematics', 'statistics', 'engineering'],
737
- 'creative': ['story', 'creative', 'write', 'novel', 'poem', 'character', 'fiction', 'narrative', 'art', 'imagination', 'plot', 'dialogue', 'setting', 'theme', 'author', 'writing', 'literature', 'poetry', 'drama', 'screenplay', 'script', 'book', 'chapter', 'scene', 'metaphor', 'symbolism', 'style', 'voice', 'tone'],
738
- 'business': ['business', 'marketing', 'strategy', 'finance', 'management', 'economics', 'profit', 'company', 'entrepreneur', 'startup', 'investment', 'revenue', 'sales', 'customer', 'market', 'competition', 'brand', 'product', 'service', 'leadership', 'team', 'organization', 'budget', 'roi', 'kpi', 'analytics', 'growth', 'scale', 'innovation'],
739
- 'general': ['explain', 'what', 'how', 'why', 'describe', 'tell', 'help', 'question', 'information', 'knowledge', 'understand', 'learn', 'teach', 'example', 'definition', 'meaning', 'concept', 'idea', 'topic', 'subject']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
  }
741
 
 
 
 
 
 
 
742
  # Initialize with default model
743
  self._initialize_system()
744
 
@@ -774,74 +808,223 @@ class UltimateMambaSwarm:
774
  logger.error(f"System initialization failed: {e}")
775
 
776
  def detect_domain_advanced(self, prompt: str) -> Tuple[str, float]:
777
- """Advanced domain detection with confidence scoring and debugging"""
778
  prompt_lower = prompt.lower()
 
 
 
 
 
779
  domain_scores = {}
780
 
781
- print(f"πŸ” Domain Detection Debug: Analyzing prompt: '{prompt[:50]}...'")
 
 
 
 
 
 
 
782
 
783
- for domain, keywords in self.domain_keywords.items():
784
- matches = []
785
- for keyword in keywords:
786
- if keyword in prompt_lower:
787
- matches.append(keyword)
 
 
 
 
 
 
788
 
789
- if matches:
790
- # Enhanced scoring algorithm
791
- base_score = len(matches) / len(keywords)
792
-
793
- # Bonus for multiple matches
794
- if len(matches) > 1:
795
- base_score *= (1.0 + 0.1 * len(matches)) # Progressive bonus
796
-
797
- # Special bonuses for specific domains
798
- if domain == 'code':
799
- # Strong bonus for programming-specific terms
800
- programming_terms = ['python', 'programming', 'code', 'function', 'script', 'algorithm', 'development', 'coding']
801
- programming_matches = sum(1 for term in programming_terms if term in matches)
802
- if programming_matches > 0:
803
- base_score *= 2.0 # Double score for programming
804
-
805
- # Extra bonus for code syntax patterns
806
- code_patterns = ['def ', 'class ', 'import ', 'for ', 'while ', 'if ', 'else:', 'try:', 'except:', 'return ', 'print(', 'input(']
807
- pattern_matches = sum(1 for pattern in code_patterns if pattern in prompt_lower)
808
- if pattern_matches > 0:
809
- base_score *= (1.5 + 0.2 * pattern_matches)
810
-
811
- elif domain == 'medical':
812
- # Bonus for medical terminology
813
- medical_terms = ['medical', 'health', 'doctor', 'patient', 'treatment', 'diagnosis']
814
- medical_matches = sum(1 for term in medical_terms if term in matches)
815
- if medical_matches > 0:
816
- base_score *= 1.8
817
-
818
- elif domain == 'science':
819
- # Bonus for scientific methodology terms
820
- science_terms = ['research', 'experiment', 'theory', 'hypothesis', 'analysis', 'study']
821
- science_matches = sum(1 for term in science_terms if term in matches)
822
- if science_matches > 0:
823
- base_score *= 1.6
824
-
825
- # Cap the score to reasonable levels
826
- domain_scores[domain] = min(base_score, 2.0)
827
- print(f" πŸ“Š {domain}: {len(matches)} matches {matches[:3]}{'...' if len(matches) > 3 else ''} β†’ Score: {domain_scores[domain]:.3f}")
828
 
829
- # Determine best domain
830
  if domain_scores:
831
  best_domain = max(domain_scores, key=domain_scores.get)
832
  confidence = min(domain_scores[best_domain], 1.0)
833
 
834
- # Ensure minimum confidence threshold for specialized domains
835
- if best_domain != 'general' and confidence < 0.3:
836
- print(f" ⚠️ Low confidence ({confidence:.3f}) for {best_domain}, falling back to general")
837
- return 'general', 0.5
 
 
 
 
 
 
 
 
 
 
838
 
839
- print(f" βœ… Selected Domain: {best_domain} (confidence: {confidence:.3f})")
840
- return best_domain, confidence
 
 
 
 
841
 
842
- print(f" πŸ”„ No specific domain detected, using general")
 
843
  return 'general', 0.5
844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
  def simulate_advanced_encoder_routing(self, domain: str, confidence: float, num_encoders: int, model_size: str) -> Dict:
846
  """Advanced encoder routing with model size consideration"""
847
 
@@ -1532,6 +1715,12 @@ Secondary: {', '.join(map(str, routing_info['selected_encoders'][8:16]))}{'...'
1532
  - **Parameter Optimization**: Dynamic
1533
  - **Fallback Protection**: Multi-layer
1534
 
 
 
 
 
 
 
1535
  **🐍 Mamba Status**: Ready for GPU activation (mamba_ssm commented out)
1536
  """
1537
 
 
728
  self.model_loaded = False
729
  self.current_model_size = "auto"
730
 
731
+ # Dynamic adaptive domain detection system
732
+ self.base_domain_patterns = {
733
+ 'medical': {
734
+ 'core_terms': ['medical', 'health', 'doctor', 'patient', 'treatment', 'diagnosis'],
735
+ 'semantic_patterns': ['symptoms of', 'treatment for', 'causes of', 'how to treat', 'medical condition'],
736
+ 'context_indicators': ['healthcare', 'clinical', 'pharmaceutical', 'therapeutic']
737
+ },
738
+ 'legal': {
739
+ 'core_terms': ['legal', 'law', 'court', 'contract', 'attorney', 'rights'],
740
+ 'semantic_patterns': ['according to law', 'legal rights', 'court case', 'legal advice', 'lawsuit'],
741
+ 'context_indicators': ['jurisdiction', 'litigation', 'statute', 'regulation']
742
+ },
743
+ 'code': {
744
+ 'core_terms': ['code', 'python', 'programming', 'function', 'algorithm', 'software'],
745
+ 'semantic_patterns': ['write a function', 'create a program', 'how to code', 'programming problem', 'implement algorithm'],
746
+ 'context_indicators': ['syntax', 'debugging', 'development', 'coding', 'script']
747
+ },
748
+ 'science': {
749
+ 'core_terms': ['science', 'research', 'experiment', 'theory', 'study', 'analysis'],
750
+ 'semantic_patterns': ['scientific method', 'research shows', 'experimental results', 'theory suggests'],
751
+ 'context_indicators': ['hypothesis', 'methodology', 'peer review', 'laboratory']
752
+ },
753
+ 'creative': {
754
+ 'core_terms': ['story', 'creative', 'write', 'character', 'fiction', 'art'],
755
+ 'semantic_patterns': ['write a story', 'create a character', 'creative writing', 'artistic expression'],
756
+ 'context_indicators': ['imagination', 'narrative', 'literature', 'poetry']
757
+ },
758
+ 'business': {
759
+ 'core_terms': ['business', 'marketing', 'strategy', 'finance', 'management', 'company'],
760
+ 'semantic_patterns': ['business plan', 'marketing strategy', 'financial analysis', 'company growth'],
761
+ 'context_indicators': ['entrepreneur', 'investment', 'revenue', 'profit']
762
+ },
763
+ 'general': {
764
+ 'core_terms': ['explain', 'what', 'how', 'why', 'describe', 'help'],
765
+ 'semantic_patterns': ['can you explain', 'what is', 'how does', 'why do', 'help me understand'],
766
+ 'context_indicators': ['information', 'knowledge', 'understanding', 'learning']
767
+ }
768
  }
769
 
770
+ # Dynamic learning components
771
+ self.learned_patterns = {} # Store patterns learned from user interactions
772
+ self.domain_context_history = [] # Track recent domain contexts for better detection
773
+ self.semantic_similarity_cache = {} # Cache for performance
774
+ self.interaction_count = 0
775
+
776
  # Initialize with default model
777
  self._initialize_system()
778
 
 
808
  logger.error(f"System initialization failed: {e}")
809
 
810
  def detect_domain_advanced(self, prompt: str) -> Tuple[str, float]:
811
+ """Advanced adaptive domain detection with machine learning-like capabilities"""
812
  prompt_lower = prompt.lower()
813
+ self.interaction_count += 1
814
+
815
+ print(f"πŸ” Adaptive Domain Detection #{self.interaction_count}: '{prompt[:50]}...'")
816
+
817
+ # Multi-layered detection approach
818
  domain_scores = {}
819
 
820
+ # Layer 1: Semantic Pattern Analysis
821
+ semantic_scores = self._analyze_semantic_patterns(prompt_lower)
822
+
823
+ # Layer 2: Context-Aware Detection
824
+ context_scores = self._analyze_context_patterns(prompt_lower)
825
+
826
+ # Layer 3: Historical Context Influence
827
+ history_scores = self._analyze_historical_context(prompt_lower)
828
 
829
+ # Layer 4: Learned Pattern Matching
830
+ learned_scores = self._analyze_learned_patterns(prompt_lower)
831
+
832
+ # Combine all layers with weighted importance
833
+ for domain in self.base_domain_patterns.keys():
834
+ combined_score = (
835
+ semantic_scores.get(domain, 0) * 0.4 +
836
+ context_scores.get(domain, 0) * 0.3 +
837
+ history_scores.get(domain, 0) * 0.2 +
838
+ learned_scores.get(domain, 0) * 0.1
839
+ )
840
 
841
+ if combined_score > 0:
842
+ domain_scores[domain] = combined_score
843
+ print(f" πŸ“ˆ {domain}: semantic={semantic_scores.get(domain, 0):.3f}, context={context_scores.get(domain, 0):.3f}, history={history_scores.get(domain, 0):.3f}, learned={learned_scores.get(domain, 0):.3f} β†’ Total={combined_score:.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844
 
845
+ # Determine best domain with dynamic thresholding
846
  if domain_scores:
847
  best_domain = max(domain_scores, key=domain_scores.get)
848
  confidence = min(domain_scores[best_domain], 1.0)
849
 
850
+ # Dynamic confidence adjustment based on interaction history
851
+ if len(self.domain_context_history) > 3:
852
+ recent_domains = [entry['domain'] for entry in self.domain_context_history[-3:]]
853
+ if best_domain in recent_domains:
854
+ confidence *= 1.1 # Boost confidence for consistent domain usage
855
+ print(f" πŸ”„ Confidence boosted due to recent domain consistency")
856
+
857
+ # Adaptive threshold - becomes more lenient with more interactions
858
+ min_threshold = max(0.2, 0.4 - (self.interaction_count * 0.01))
859
+
860
+ if confidence >= min_threshold:
861
+ # Store successful detection for learning
862
+ self._update_learned_patterns(prompt_lower, best_domain, confidence)
863
+ self._update_context_history(prompt, best_domain, confidence)
864
 
865
+ print(f" βœ… Selected Domain: {best_domain} (confidence: {confidence:.3f}, threshold: {min_threshold:.3f})")
866
+ return best_domain, confidence
867
+ else:
868
+ print(f" ⚠️ Low confidence ({confidence:.3f} < {min_threshold:.3f}), using general")
869
+ else:
870
+ print(f" πŸ”„ No patterns matched, using general")
871
 
872
+ # Fallback to general with context storage
873
+ self._update_context_history(prompt, 'general', 0.5)
874
  return 'general', 0.5
875
 
876
+ def _analyze_semantic_patterns(self, prompt_lower: str) -> Dict[str, float]:
877
+ """Analyze semantic patterns in the prompt"""
878
+ scores = {}
879
+
880
+ for domain, patterns in self.base_domain_patterns.items():
881
+ score = 0
882
+
883
+ # Check core terms with fuzzy matching
884
+ core_matches = sum(1 for term in patterns['core_terms'] if term in prompt_lower)
885
+ score += core_matches * 0.3
886
+
887
+ # Check semantic patterns (phrase-level matching)
888
+ pattern_matches = sum(1 for pattern in patterns['semantic_patterns'] if pattern in prompt_lower)
889
+ score += pattern_matches * 0.5
890
+
891
+ # Special domain-specific boosters
892
+ if domain == 'code':
893
+ # Look for code-specific patterns
894
+ code_indicators = ['def ', 'class ', 'import ', 'function(', '()', '{', '}', '[]', 'return ', 'print(', 'console.log']
895
+ code_pattern_score = sum(1 for indicator in code_indicators if indicator in prompt_lower)
896
+ score += code_pattern_score * 0.4
897
+
898
+ # Programming language detection
899
+ languages = ['python', 'javascript', 'java', 'c++', 'html', 'css', 'sql', 'react', 'node']
900
+ lang_score = sum(1 for lang in languages if lang in prompt_lower)
901
+ score += lang_score * 0.3
902
+
903
+ elif domain == 'medical':
904
+ # Medical question patterns
905
+ medical_questions = ['what causes', 'symptoms of', 'treatment for', 'how to cure', 'side effects']
906
+ med_pattern_score = sum(1 for pattern in medical_questions if pattern in prompt_lower)
907
+ score += med_pattern_score * 0.4
908
+
909
+ elif domain == 'creative':
910
+ # Creative request patterns
911
+ creative_requests = ['write a', 'create a story', 'imagine', 'make up', 'fictional']
912
+ creative_score = sum(1 for pattern in creative_requests if pattern in prompt_lower)
913
+ score += creative_score * 0.4
914
+
915
+ if score > 0:
916
+ scores[domain] = min(score, 2.0) # Cap maximum score
917
+
918
+ return scores
919
+
920
+ def _analyze_context_patterns(self, prompt_lower: str) -> Dict[str, float]:
921
+ """Analyze contextual indicators in the prompt"""
922
+ scores = {}
923
+
924
+ for domain, patterns in self.base_domain_patterns.items():
925
+ score = 0
926
+
927
+ # Context indicators
928
+ context_matches = sum(1 for indicator in patterns['context_indicators'] if indicator in prompt_lower)
929
+ score += context_matches * 0.2
930
+
931
+ # Question type analysis
932
+ if any(q in prompt_lower for q in ['how to', 'what is', 'explain']):
933
+ if domain in ['general', 'science']:
934
+ score += 0.2
935
+
936
+ if any(q in prompt_lower for q in ['create', 'make', 'build', 'develop']):
937
+ if domain in ['code', 'creative', 'business']:
938
+ score += 0.3
939
+
940
+ if score > 0:
941
+ scores[domain] = score
942
+
943
+ return scores
944
+
945
+ def _analyze_historical_context(self, prompt_lower: str) -> Dict[str, float]:
946
+ """Analyze based on recent interaction history"""
947
+ scores = {}
948
+
949
+ if not self.domain_context_history:
950
+ return scores
951
+
952
+ # Look at recent domain patterns
953
+ recent_history = self.domain_context_history[-5:] # Last 5 interactions
954
+ domain_frequency = {}
955
+
956
+ for entry in recent_history:
957
+ domain = entry['domain']
958
+ domain_frequency[domain] = domain_frequency.get(domain, 0) + 1
959
+
960
+ # Boost scores for recently used domains
961
+ for domain, frequency in domain_frequency.items():
962
+ if domain != 'general': # Don't boost general
963
+ boost = frequency * 0.1
964
+ scores[domain] = boost
965
+
966
+ return scores
967
+
968
+ def _analyze_learned_patterns(self, prompt_lower: str) -> Dict[str, float]:
969
+ """Analyze using patterns learned from previous interactions"""
970
+ scores = {}
971
+
972
+ for domain, learned_data in self.learned_patterns.items():
973
+ score = 0
974
+
975
+ # Check learned phrases
976
+ for phrase, weight in learned_data.get('phrases', {}).items():
977
+ if phrase in prompt_lower:
978
+ score += weight * 0.2
979
+
980
+ # Check learned word combinations
981
+ for combo, weight in learned_data.get('combinations', {}).items():
982
+ if all(word in prompt_lower for word in combo.split()):
983
+ score += weight * 0.3
984
+
985
+ if score > 0:
986
+ scores[domain] = min(score, 1.0)
987
+
988
+ return scores
989
+
990
+ def _update_learned_patterns(self, prompt_lower: str, domain: str, confidence: float):
991
+ """Update learned patterns based on successful detections"""
992
+ if domain not in self.learned_patterns:
993
+ self.learned_patterns[domain] = {'phrases': {}, 'combinations': {}}
994
+
995
+ # Extract and store successful phrases (2-4 words)
996
+ words = prompt_lower.split()
997
+ for i in range(len(words) - 1):
998
+ for length in [2, 3, 4]:
999
+ if i + length <= len(words):
1000
+ phrase = ' '.join(words[i:i+length])
1001
+ if len(phrase) > 8: # Only meaningful phrases
1002
+ current_weight = self.learned_patterns[domain]['phrases'].get(phrase, 0)
1003
+ self.learned_patterns[domain]['phrases'][phrase] = min(current_weight + confidence * 0.1, 1.0)
1004
+
1005
+ # Limit stored patterns to prevent memory bloat
1006
+ if len(self.learned_patterns[domain]['phrases']) > 100:
1007
+ # Keep only top 50 patterns
1008
+ sorted_phrases = sorted(
1009
+ self.learned_patterns[domain]['phrases'].items(),
1010
+ key=lambda x: x[1],
1011
+ reverse=True
1012
+ )
1013
+ self.learned_patterns[domain]['phrases'] = dict(sorted_phrases[:50])
1014
+
1015
+ def _update_context_history(self, prompt: str, domain: str, confidence: float):
1016
+ """Update interaction history for context analysis"""
1017
+ self.domain_context_history.append({
1018
+ 'prompt': prompt[:100], # Store truncated prompt
1019
+ 'domain': domain,
1020
+ 'confidence': confidence,
1021
+ 'timestamp': time.time()
1022
+ })
1023
+
1024
+ # Keep only recent history (last 20 interactions)
1025
+ if len(self.domain_context_history) > 20:
1026
+ self.domain_context_history = self.domain_context_history[-20:]
1027
+
1028
  def simulate_advanced_encoder_routing(self, domain: str, confidence: float, num_encoders: int, model_size: str) -> Dict:
1029
  """Advanced encoder routing with model size consideration"""
1030
 
 
1715
  - **Parameter Optimization**: Dynamic
1716
  - **Fallback Protection**: Multi-layer
1717
 
1718
+ **🧠 Adaptive Learning System:**
1719
+ - **Interactions Processed**: {self.interaction_count}
1720
+ - **Learned Patterns**: {sum(len(patterns.get('phrases', {})) for patterns in self.learned_patterns.values())}
1721
+ - **Context History**: {len(self.domain_context_history)} entries
1722
+ - **Learning Domains**: {', '.join(self.learned_patterns.keys()) if self.learned_patterns else 'Initializing'}
1723
+
1724
  **🐍 Mamba Status**: Ready for GPU activation (mamba_ssm commented out)
1725
  """
1726