Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -730,13 +730,13 @@ class UltimateMambaSwarm:
|
|
730 |
|
731 |
# Enhanced domain detection with confidence scoring
|
732 |
self.domain_keywords = {
|
733 |
-
'medical': ['medical', 'health', 'doctor', 'patient', 'disease', 'treatment', 'symptom', 'diagnosis', 'medicine', 'hospital'],
|
734 |
-
'legal': ['legal', 'law', 'court', 'judge', 'contract', 'attorney', 'lawyer', 'legislation', 'rights', 'lawsuit'],
|
735 |
-
'code': ['code', 'python', 'programming', 'function', 'algorithm', 'software', 'debug', 'script', 'developer', 'syntax', 'variable', 'loop', 'class', 'method', 'library', 'framework', 'api', 'database', 'web development'],
|
736 |
-
'science': ['science', 'research', 'experiment', 'theory', 'physics', 'chemistry', 'biology', 'scientific', 'hypothesis'],
|
737 |
-
'creative': ['story', 'creative', 'write', 'novel', 'poem', 'character', 'fiction', 'narrative', 'art', 'imagination'],
|
738 |
-
'business': ['business', 'marketing', 'strategy', 'finance', 'management', 'economics', 'profit', 'company', 'entrepreneur'],
|
739 |
-
'general': ['explain', 'what', 'how', 'why', 'describe', 'tell', 'help', 'question', 'information', 'knowledge']
|
740 |
}
|
741 |
|
742 |
# Initialize with default model
|
@@ -774,35 +774,72 @@ class UltimateMambaSwarm:
|
|
774 |
logger.error(f"System initialization failed: {e}")
|
775 |
|
776 |
def detect_domain_advanced(self, prompt: str) -> Tuple[str, float]:
|
777 |
-
"""Advanced domain detection with confidence scoring"""
|
778 |
prompt_lower = prompt.lower()
|
779 |
domain_scores = {}
|
780 |
|
|
|
|
|
781 |
for domain, keywords in self.domain_keywords.items():
|
782 |
-
matches =
|
783 |
-
|
784 |
-
|
785 |
-
|
|
|
|
|
|
|
|
|
|
|
786 |
# Bonus for multiple matches
|
787 |
-
if matches > 1:
|
788 |
-
|
789 |
-
|
|
|
790 |
if domain == 'code':
|
791 |
-
#
|
792 |
-
|
793 |
-
|
794 |
-
if
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
800 |
|
|
|
801 |
if domain_scores:
|
802 |
best_domain = max(domain_scores, key=domain_scores.get)
|
803 |
confidence = min(domain_scores[best_domain], 1.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
804 |
return best_domain, confidence
|
805 |
|
|
|
806 |
return 'general', 0.5
|
807 |
|
808 |
def simulate_advanced_encoder_routing(self, domain: str, confidence: float, num_encoders: int, model_size: str) -> Dict:
|
@@ -910,19 +947,64 @@ class UltimateMambaSwarm:
|
|
910 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float, domain: str = 'general') -> str:
|
911 |
"""Generate using loaded model with ultimate optimization and content safety"""
|
912 |
try:
|
|
|
|
|
913 |
# Get optimal parameters
|
914 |
gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
|
915 |
|
916 |
-
#
|
917 |
-
|
918 |
-
|
919 |
-
|
920 |
-
|
921 |
-
|
922 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
923 |
|
924 |
-
|
925 |
-
|
926 |
|
927 |
# Tokenize with safety
|
928 |
inputs = self.model_loader.tokenizer.encode(
|
@@ -1001,19 +1083,45 @@ class UltimateMambaSwarm:
|
|
1001 |
def _is_response_too_generic(self, response: str, prompt: str, domain: str) -> bool:
|
1002 |
"""Check if response is too generic and doesn't address the domain-specific prompt"""
|
1003 |
if not response or len(response.strip()) < 20:
|
|
|
1004 |
return True
|
1005 |
|
1006 |
response_lower = response.lower()
|
1007 |
prompt_lower = prompt.lower()
|
1008 |
|
1009 |
-
|
|
|
|
|
1010 |
if domain == 'code':
|
1011 |
-
|
1012 |
-
|
|
|
|
|
|
|
1013 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1014 |
|
1015 |
# Check if response is just repeating the prompt without answering
|
1016 |
-
if response_lower.startswith(prompt_lower[:
|
|
|
1017 |
return True
|
1018 |
|
1019 |
# Check for overly generic responses
|
@@ -1023,13 +1131,26 @@ class UltimateMambaSwarm:
|
|
1023 |
'it depends on various factors',
|
1024 |
'this requires careful consideration',
|
1025 |
'multiple perspectives',
|
1026 |
-
'interconnected concepts'
|
|
|
|
|
|
|
1027 |
]
|
1028 |
|
1029 |
generic_count = sum(1 for pattern in generic_patterns if pattern in response_lower)
|
1030 |
if generic_count >= 2: # Too many generic phrases
|
|
|
1031 |
return True
|
1032 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1033 |
return False
|
1034 |
|
1035 |
def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
|
|
|
730 |
|
731 |
# Enhanced domain detection with confidence scoring
|
732 |
self.domain_keywords = {
|
733 |
+
'medical': ['medical', 'health', 'doctor', 'patient', 'disease', 'treatment', 'symptom', 'diagnosis', 'medicine', 'hospital', 'clinical', 'therapy', 'pharmaceutical', 'healthcare', 'surgeon', 'nurse', 'clinic', 'prescription', 'dosage', 'vaccine'],
|
734 |
+
'legal': ['legal', 'law', 'court', 'judge', 'contract', 'attorney', 'lawyer', 'legislation', 'rights', 'lawsuit', 'statute', 'regulation', 'jurisdiction', 'litigation', 'defendant', 'plaintiff', 'evidence', 'testimony', 'verdict', 'appeal'],
|
735 |
+
'code': ['code', 'python', 'programming', 'function', 'algorithm', 'software', 'debug', 'script', 'developer', 'syntax', 'variable', 'loop', 'class', 'method', 'library', 'framework', 'api', 'database', 'web development', 'javascript', 'html', 'css', 'react', 'node', 'git', 'github', 'programming language', 'coding', 'development', 'computer science', 'data structure', 'array', 'list', 'dictionary', 'string', 'integer', 'boolean', 'import', 'def', 'if', 'else', 'for', 'while', 'try', 'except', 'return', 'print', 'input', 'output', 'file', 'json', 'xml', 'csv', 'pandas', 'numpy', 'matplotlib', 'sklearn', 'tensorflow', 'pytorch', 'machine learning', 'ai', 'artificial intelligence', 'neural network', 'deep learning', 'model training', 'regression', 'classification', 'clustering', 'supervised', 'unsupervised', 'reinforcement learning'],
|
736 |
+
'science': ['science', 'research', 'experiment', 'theory', 'physics', 'chemistry', 'biology', 'scientific', 'hypothesis', 'laboratory', 'analysis', 'data', 'observation', 'methodology', 'peer review', 'publication', 'journal', 'academic', 'study', 'quantum', 'molecular', 'genetic', 'evolution', 'ecosystem', 'climate', 'astronomy', 'geology', 'mathematics', 'statistics', 'engineering'],
|
737 |
+
'creative': ['story', 'creative', 'write', 'novel', 'poem', 'character', 'fiction', 'narrative', 'art', 'imagination', 'plot', 'dialogue', 'setting', 'theme', 'author', 'writing', 'literature', 'poetry', 'drama', 'screenplay', 'script', 'book', 'chapter', 'scene', 'metaphor', 'symbolism', 'style', 'voice', 'tone'],
|
738 |
+
'business': ['business', 'marketing', 'strategy', 'finance', 'management', 'economics', 'profit', 'company', 'entrepreneur', 'startup', 'investment', 'revenue', 'sales', 'customer', 'market', 'competition', 'brand', 'product', 'service', 'leadership', 'team', 'organization', 'budget', 'roi', 'kpi', 'analytics', 'growth', 'scale', 'innovation'],
|
739 |
+
'general': ['explain', 'what', 'how', 'why', 'describe', 'tell', 'help', 'question', 'information', 'knowledge', 'understand', 'learn', 'teach', 'example', 'definition', 'meaning', 'concept', 'idea', 'topic', 'subject']
|
740 |
}
|
741 |
|
742 |
# Initialize with default model
|
|
|
774 |
logger.error(f"System initialization failed: {e}")
|
775 |
|
776 |
def detect_domain_advanced(self, prompt: str) -> Tuple[str, float]:
|
777 |
+
"""Advanced domain detection with confidence scoring and debugging"""
|
778 |
prompt_lower = prompt.lower()
|
779 |
domain_scores = {}
|
780 |
|
781 |
+
print(f"π Domain Detection Debug: Analyzing prompt: '{prompt[:50]}...'")
|
782 |
+
|
783 |
for domain, keywords in self.domain_keywords.items():
|
784 |
+
matches = []
|
785 |
+
for keyword in keywords:
|
786 |
+
if keyword in prompt_lower:
|
787 |
+
matches.append(keyword)
|
788 |
+
|
789 |
+
if matches:
|
790 |
+
# Enhanced scoring algorithm
|
791 |
+
base_score = len(matches) / len(keywords)
|
792 |
+
|
793 |
# Bonus for multiple matches
|
794 |
+
if len(matches) > 1:
|
795 |
+
base_score *= (1.0 + 0.1 * len(matches)) # Progressive bonus
|
796 |
+
|
797 |
+
# Special bonuses for specific domains
|
798 |
if domain == 'code':
|
799 |
+
# Strong bonus for programming-specific terms
|
800 |
+
programming_terms = ['python', 'programming', 'code', 'function', 'script', 'algorithm', 'development', 'coding']
|
801 |
+
programming_matches = sum(1 for term in programming_terms if term in matches)
|
802 |
+
if programming_matches > 0:
|
803 |
+
base_score *= 2.0 # Double score for programming
|
804 |
+
|
805 |
+
# Extra bonus for code syntax patterns
|
806 |
+
code_patterns = ['def ', 'class ', 'import ', 'for ', 'while ', 'if ', 'else:', 'try:', 'except:', 'return ', 'print(', 'input(']
|
807 |
+
pattern_matches = sum(1 for pattern in code_patterns if pattern in prompt_lower)
|
808 |
+
if pattern_matches > 0:
|
809 |
+
base_score *= (1.5 + 0.2 * pattern_matches)
|
810 |
+
|
811 |
+
elif domain == 'medical':
|
812 |
+
# Bonus for medical terminology
|
813 |
+
medical_terms = ['medical', 'health', 'doctor', 'patient', 'treatment', 'diagnosis']
|
814 |
+
medical_matches = sum(1 for term in medical_terms if term in matches)
|
815 |
+
if medical_matches > 0:
|
816 |
+
base_score *= 1.8
|
817 |
+
|
818 |
+
elif domain == 'science':
|
819 |
+
# Bonus for scientific methodology terms
|
820 |
+
science_terms = ['research', 'experiment', 'theory', 'hypothesis', 'analysis', 'study']
|
821 |
+
science_matches = sum(1 for term in science_terms if term in matches)
|
822 |
+
if science_matches > 0:
|
823 |
+
base_score *= 1.6
|
824 |
+
|
825 |
+
# Cap the score to reasonable levels
|
826 |
+
domain_scores[domain] = min(base_score, 2.0)
|
827 |
+
print(f" π {domain}: {len(matches)} matches {matches[:3]}{'...' if len(matches) > 3 else ''} β Score: {domain_scores[domain]:.3f}")
|
828 |
|
829 |
+
# Determine best domain
|
830 |
if domain_scores:
|
831 |
best_domain = max(domain_scores, key=domain_scores.get)
|
832 |
confidence = min(domain_scores[best_domain], 1.0)
|
833 |
+
|
834 |
+
# Ensure minimum confidence threshold for specialized domains
|
835 |
+
if best_domain != 'general' and confidence < 0.3:
|
836 |
+
print(f" β οΈ Low confidence ({confidence:.3f}) for {best_domain}, falling back to general")
|
837 |
+
return 'general', 0.5
|
838 |
+
|
839 |
+
print(f" β
Selected Domain: {best_domain} (confidence: {confidence:.3f})")
|
840 |
return best_domain, confidence
|
841 |
|
842 |
+
print(f" π No specific domain detected, using general")
|
843 |
return 'general', 0.5
|
844 |
|
845 |
def simulate_advanced_encoder_routing(self, domain: str, confidence: float, num_encoders: int, model_size: str) -> Dict:
|
|
|
947 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float, domain: str = 'general') -> str:
|
948 |
"""Generate using loaded model with ultimate optimization and content safety"""
|
949 |
try:
|
950 |
+
print(f"π― Generating for domain: {domain}")
|
951 |
+
|
952 |
# Get optimal parameters
|
953 |
gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
|
954 |
|
955 |
+
# Domain-specific parameter adjustments
|
956 |
+
if domain == 'code':
|
957 |
+
# More deterministic for code generation
|
958 |
+
gen_params.update({
|
959 |
+
"temperature": min(gen_params.get("temperature", 0.3), 0.4),
|
960 |
+
"top_p": min(gen_params.get("top_p", 0.8), 0.85),
|
961 |
+
"repetition_penalty": 1.1
|
962 |
+
})
|
963 |
+
# Domain-specific prompt formatting
|
964 |
+
if any(keyword in prompt.lower() for keyword in ['function', 'code', 'python', 'programming', 'script']):
|
965 |
+
safe_prompt = f"Programming Task: {prompt}\n\nSolution:"
|
966 |
+
else:
|
967 |
+
safe_prompt = f"Technical Question: {prompt}\nAnswer:"
|
968 |
+
|
969 |
+
elif domain == 'medical':
|
970 |
+
# Conservative parameters for medical content
|
971 |
+
gen_params.update({
|
972 |
+
"temperature": min(gen_params.get("temperature", 0.5), 0.6),
|
973 |
+
"top_p": min(gen_params.get("top_p", 0.8), 0.85),
|
974 |
+
"repetition_penalty": 1.2
|
975 |
+
})
|
976 |
+
safe_prompt = f"Medical Query: {prompt}\nProfessional Response:"
|
977 |
+
|
978 |
+
elif domain == 'science':
|
979 |
+
# Balanced parameters for scientific accuracy
|
980 |
+
gen_params.update({
|
981 |
+
"temperature": min(gen_params.get("temperature", 0.6), 0.7),
|
982 |
+
"top_p": min(gen_params.get("top_p", 0.85), 0.9),
|
983 |
+
"repetition_penalty": 1.15
|
984 |
+
})
|
985 |
+
safe_prompt = f"Scientific Question: {prompt}\nAnalysis:"
|
986 |
+
|
987 |
+
elif domain == 'creative':
|
988 |
+
# More creative parameters
|
989 |
+
gen_params.update({
|
990 |
+
"temperature": max(gen_params.get("temperature", 0.8), 0.7),
|
991 |
+
"top_p": max(gen_params.get("top_p", 0.9), 0.85),
|
992 |
+
"repetition_penalty": 1.05
|
993 |
+
})
|
994 |
+
safe_prompt = f"Creative Prompt: {prompt}\nResponse:"
|
995 |
+
|
996 |
+
else:
|
997 |
+
# General domain - balanced approach
|
998 |
+
gen_params.update({
|
999 |
+
"repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
|
1000 |
+
"no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
|
1001 |
+
"temperature": min(gen_params.get("temperature", 0.7), 0.8),
|
1002 |
+
"top_p": min(gen_params.get("top_p", 0.9), 0.85)
|
1003 |
+
})
|
1004 |
+
safe_prompt = f"Question: {prompt}\nAnswer:"
|
1005 |
|
1006 |
+
print(f"π Using prompt format: '{safe_prompt[:50]}...'")
|
1007 |
+
print(f"βοΈ Generation params: temp={gen_params['temperature']:.2f}, top_p={gen_params['top_p']:.2f}")
|
1008 |
|
1009 |
# Tokenize with safety
|
1010 |
inputs = self.model_loader.tokenizer.encode(
|
|
|
1083 |
def _is_response_too_generic(self, response: str, prompt: str, domain: str) -> bool:
|
1084 |
"""Check if response is too generic and doesn't address the domain-specific prompt"""
|
1085 |
if not response or len(response.strip()) < 20:
|
1086 |
+
print(f"β οΈ Response too short: {len(response)} chars")
|
1087 |
return True
|
1088 |
|
1089 |
response_lower = response.lower()
|
1090 |
prompt_lower = prompt.lower()
|
1091 |
|
1092 |
+
print(f"π Quality Check - Domain: {domain}, Response: '{response[:50]}...'")
|
1093 |
+
|
1094 |
+
# Domain-specific validation
|
1095 |
if domain == 'code':
|
1096 |
+
# Must contain programming-related terms for code domain
|
1097 |
+
code_indicators = ['python', 'code', 'programming', 'function', 'variable', 'syntax', 'example', 'script', 'library', 'def ', 'class', 'import', 'algorithm', 'development', 'software']
|
1098 |
+
code_matches = sum(1 for indicator in code_indicators if indicator in response_lower)
|
1099 |
+
if code_matches == 0:
|
1100 |
+
print(f"β οΈ No code indicators found in response for code domain")
|
1101 |
return True
|
1102 |
+
print(f"β
Found {code_matches} code indicators")
|
1103 |
+
|
1104 |
+
elif domain == 'medical':
|
1105 |
+
# Must contain medical terminology
|
1106 |
+
medical_indicators = ['medical', 'health', 'treatment', 'clinical', 'patient', 'diagnosis', 'therapy', 'healthcare', 'medicine', 'doctor']
|
1107 |
+
medical_matches = sum(1 for indicator in medical_indicators if indicator in response_lower)
|
1108 |
+
if medical_matches == 0:
|
1109 |
+
print(f"β οΈ No medical indicators found in response for medical domain")
|
1110 |
+
return True
|
1111 |
+
print(f"β
Found {medical_matches} medical indicators")
|
1112 |
+
|
1113 |
+
elif domain == 'science':
|
1114 |
+
# Must contain scientific terminology
|
1115 |
+
science_indicators = ['research', 'study', 'analysis', 'experiment', 'theory', 'hypothesis', 'scientific', 'methodology', 'data', 'evidence']
|
1116 |
+
science_matches = sum(1 for indicator in science_indicators if indicator in response_lower)
|
1117 |
+
if science_matches == 0:
|
1118 |
+
print(f"β οΈ No science indicators found in response for science domain")
|
1119 |
+
return True
|
1120 |
+
print(f"β
Found {science_matches} science indicators")
|
1121 |
|
1122 |
# Check if response is just repeating the prompt without answering
|
1123 |
+
if len(prompt_lower) > 10 and response_lower.startswith(prompt_lower[:15]):
|
1124 |
+
print(f"β οΈ Response just repeats the prompt")
|
1125 |
return True
|
1126 |
|
1127 |
# Check for overly generic responses
|
|
|
1131 |
'it depends on various factors',
|
1132 |
'this requires careful consideration',
|
1133 |
'multiple perspectives',
|
1134 |
+
'interconnected concepts',
|
1135 |
+
'this is an interesting question',
|
1136 |
+
'there are several approaches',
|
1137 |
+
'it\'s important to consider'
|
1138 |
]
|
1139 |
|
1140 |
generic_count = sum(1 for pattern in generic_patterns if pattern in response_lower)
|
1141 |
if generic_count >= 2: # Too many generic phrases
|
1142 |
+
print(f"β οΈ Too many generic phrases ({generic_count})")
|
1143 |
return True
|
1144 |
|
1145 |
+
# Check for responses that don't actually answer the question
|
1146 |
+
question_indicators = ['what', 'how', 'why', 'when', 'where', 'which', 'explain', 'describe', 'create', 'write', 'make', 'build']
|
1147 |
+
if any(indicator in prompt_lower for indicator in question_indicators):
|
1148 |
+
# This is clearly a question, response should provide specific information
|
1149 |
+
if len(response.split()) < 30: # Very short response to a clear question
|
1150 |
+
print(f"β οΈ Very short response ({len(response.split())} words) to a clear question")
|
1151 |
+
return True
|
1152 |
+
|
1153 |
+
print(f"β
Response passed quality checks")
|
1154 |
return False
|
1155 |
|
1156 |
def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
|