Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
@@ -113,49 +113,49 @@ class UltimateModelLoader:
|
|
113 |
"vocab_size": 50280,
|
114 |
"d_model": 2048
|
115 |
},
|
116 |
-
# Alternative efficient models (no mamba-ssm required)
|
117 |
-
"microsoft/DialoGPT-small": {
|
118 |
-
"display_name": "DialoGPT Small (117M) [Efficient Alternative]",
|
119 |
-
"size": "small",
|
120 |
-
"priority": 13,
|
121 |
-
"reliable": True,
|
122 |
-
"params": 117_000_000
|
123 |
-
},
|
124 |
"gpt2-large": {
|
125 |
-
"display_name": "GPT2 Large (774M) [High Performance]",
|
126 |
"size": "large",
|
127 |
-
"priority":
|
128 |
"reliable": True,
|
129 |
"params": 774_000_000
|
130 |
},
|
131 |
-
# High-quality alternative models (priority 20-27)
|
132 |
"gpt2-medium": {
|
133 |
-
"display_name": "GPT2 Medium (355M)",
|
134 |
"size": "medium",
|
135 |
-
"priority":
|
136 |
"reliable": True,
|
137 |
"params": 355_000_000
|
138 |
},
|
139 |
"gpt2": {
|
140 |
-
"display_name": "GPT2 Base (117M)",
|
141 |
"size": "small",
|
142 |
-
"priority":
|
143 |
"reliable": True,
|
144 |
"params": 117_000_000
|
145 |
},
|
146 |
"distilgpt2": {
|
147 |
-
"display_name": "DistilGPT2 (82M)",
|
148 |
"size": "small",
|
149 |
-
"priority":
|
150 |
"reliable": True,
|
151 |
"params": 82_000_000
|
152 |
},
|
|
|
153 |
"microsoft/DialoGPT-medium": {
|
154 |
-
"display_name": "DialoGPT Medium (355M)",
|
155 |
"size": "medium",
|
156 |
-
"priority":
|
157 |
-
"reliable":
|
158 |
"params": 355_000_000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
}
|
160 |
})
|
161 |
|
@@ -895,14 +895,25 @@ class UltimateMambaSwarm:
|
|
895 |
return f"Generation error occurred. Using fallback response.", ""
|
896 |
|
897 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
|
898 |
-
"""Generate using loaded model with ultimate optimization"""
|
899 |
try:
|
900 |
# Get optimal parameters
|
901 |
gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
|
902 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
903 |
# Tokenize with safety
|
904 |
inputs = self.model_loader.tokenizer.encode(
|
905 |
-
|
906 |
return_tensors="pt",
|
907 |
truncation=True,
|
908 |
max_length=512
|
@@ -916,21 +927,86 @@ class UltimateMambaSwarm:
|
|
916 |
# Decode and validate
|
917 |
generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
918 |
|
919 |
-
# Extract response
|
920 |
-
if generated_text.startswith(
|
|
|
|
|
921 |
response = generated_text[len(prompt):].strip()
|
922 |
else:
|
923 |
response = generated_text.strip()
|
924 |
|
|
|
|
|
|
|
|
|
|
|
925 |
return response if response else "I'm processing your request..."
|
926 |
|
927 |
except Exception as e:
|
928 |
logger.error(f"Model generation error: {e}")
|
929 |
return self._generate_ultimate_fallback(prompt, 'general')
|
930 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
931 |
def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
|
932 |
"""Ultimate fallback responses with maximum quality"""
|
933 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
934 |
fallback_responses = {
|
935 |
'medical': f"""**π₯ Medical Information Analysis: "{prompt[:60]}..."**
|
936 |
|
|
|
113 |
"vocab_size": 50280,
|
114 |
"d_model": 2048
|
115 |
},
|
116 |
+
# Alternative efficient models (no mamba-ssm required) - GPT2 prioritized over DialoGPT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
"gpt2-large": {
|
118 |
+
"display_name": "GPT2 Large (774M) [High Performance Alternative]",
|
119 |
"size": "large",
|
120 |
+
"priority": 13,
|
121 |
"reliable": True,
|
122 |
"params": 774_000_000
|
123 |
},
|
|
|
124 |
"gpt2-medium": {
|
125 |
+
"display_name": "GPT2 Medium (355M) [Balanced Alternative]",
|
126 |
"size": "medium",
|
127 |
+
"priority": 14,
|
128 |
"reliable": True,
|
129 |
"params": 355_000_000
|
130 |
},
|
131 |
"gpt2": {
|
132 |
+
"display_name": "GPT2 Base (117M) [Fast Alternative]",
|
133 |
"size": "small",
|
134 |
+
"priority": 15,
|
135 |
"reliable": True,
|
136 |
"params": 117_000_000
|
137 |
},
|
138 |
"distilgpt2": {
|
139 |
+
"display_name": "DistilGPT2 (82M) [Ultra-Fast]",
|
140 |
"size": "small",
|
141 |
+
"priority": 16,
|
142 |
"reliable": True,
|
143 |
"params": 82_000_000
|
144 |
},
|
145 |
+
# Conversational models (lower priority due to potential inappropriate responses)
|
146 |
"microsoft/DialoGPT-medium": {
|
147 |
+
"display_name": "DialoGPT Medium (355M) [Conversational]",
|
148 |
"size": "medium",
|
149 |
+
"priority": 25,
|
150 |
+
"reliable": False, # Marked as less reliable due to Reddit training data
|
151 |
"params": 355_000_000
|
152 |
+
},
|
153 |
+
"microsoft/DialoGPT-small": {
|
154 |
+
"display_name": "DialoGPT Small (117M) [Conversational]",
|
155 |
+
"size": "small",
|
156 |
+
"priority": 26,
|
157 |
+
"reliable": False, # Marked as less reliable due to Reddit training data
|
158 |
+
"params": 117_000_000
|
159 |
}
|
160 |
})
|
161 |
|
|
|
895 |
return f"Generation error occurred. Using fallback response.", ""
|
896 |
|
897 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
|
898 |
+
"""Generate using loaded model with ultimate optimization and content safety"""
|
899 |
try:
|
900 |
# Get optimal parameters
|
901 |
gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
|
902 |
|
903 |
+
# Add content safety parameters
|
904 |
+
gen_params.update({
|
905 |
+
"repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
|
906 |
+
"no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
|
907 |
+
"temperature": min(gen_params.get("temperature", 0.7), 0.8), # Cap temperature for safety
|
908 |
+
"top_p": min(gen_params.get("top_p", 0.9), 0.85) # More focused sampling
|
909 |
+
})
|
910 |
+
|
911 |
+
# Create safer prompt format
|
912 |
+
safe_prompt = f"Question: {prompt}\nAnswer:"
|
913 |
+
|
914 |
# Tokenize with safety
|
915 |
inputs = self.model_loader.tokenizer.encode(
|
916 |
+
safe_prompt,
|
917 |
return_tensors="pt",
|
918 |
truncation=True,
|
919 |
max_length=512
|
|
|
927 |
# Decode and validate
|
928 |
generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
929 |
|
930 |
+
# Extract response safely
|
931 |
+
if generated_text.startswith(safe_prompt):
|
932 |
+
response = generated_text[len(safe_prompt):].strip()
|
933 |
+
elif generated_text.startswith(prompt):
|
934 |
response = generated_text[len(prompt):].strip()
|
935 |
else:
|
936 |
response = generated_text.strip()
|
937 |
|
938 |
+
# Content safety filtering
|
939 |
+
if self._is_inappropriate_content(response):
|
940 |
+
logger.warning("π‘οΈ Inappropriate content detected, using fallback")
|
941 |
+
return self._generate_ultimate_fallback(prompt, 'general')
|
942 |
+
|
943 |
return response if response else "I'm processing your request..."
|
944 |
|
945 |
except Exception as e:
|
946 |
logger.error(f"Model generation error: {e}")
|
947 |
return self._generate_ultimate_fallback(prompt, 'general')
|
948 |
|
949 |
+
def _is_inappropriate_content(self, text: str) -> bool:
|
950 |
+
"""Advanced content safety filtering"""
|
951 |
+
if not text or len(text.strip()) < 3:
|
952 |
+
return True
|
953 |
+
|
954 |
+
text_lower = text.lower()
|
955 |
+
|
956 |
+
# Check for inappropriate content patterns
|
957 |
+
inappropriate_patterns = [
|
958 |
+
# Sexual content
|
959 |
+
'sexual', 'dude who likes to have fun with dudes', 'sexual orientation',
|
960 |
+
# Offensive language (basic filter)
|
961 |
+
'damn', 'hell', 'stupid', 'idiot',
|
962 |
+
# Inappropriate casual language
|
963 |
+
'just a dude', 'i\'m just a', 'whatever man',
|
964 |
+
# Reddit-style inappropriate responses
|
965 |
+
'bro', 'dude', 'man', 'guys', 'lol', 'lmao', 'wtf'
|
966 |
+
]
|
967 |
+
|
968 |
+
# Check for patterns that suggest inappropriate content
|
969 |
+
for pattern in inappropriate_patterns:
|
970 |
+
if pattern in text_lower:
|
971 |
+
return True
|
972 |
+
|
973 |
+
# Check for very short, casual responses that don't answer the question
|
974 |
+
if len(text.strip()) < 20 and any(word in text_lower for word in ['dude', 'bro', 'man', 'whatever']):
|
975 |
+
return True
|
976 |
+
|
977 |
+
# Check for responses that don't seem to address the prompt properly
|
978 |
+
if 'tell me more about yourself' in text_lower and len(text.strip()) < 100:
|
979 |
+
return True
|
980 |
+
|
981 |
+
return False
|
982 |
+
|
983 |
def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
|
984 |
"""Ultimate fallback responses with maximum quality"""
|
985 |
|
986 |
+
# Special handling for self-introduction prompts
|
987 |
+
prompt_lower = prompt.lower()
|
988 |
+
if any(phrase in prompt_lower for phrase in ['tell me about yourself', 'who are you', 'what are you']):
|
989 |
+
return """**π Mamba Encoder Swarm AI Assistant**
|
990 |
+
|
991 |
+
I'm an advanced AI language model powered by the Mamba Encoder Swarm architecture, designed to provide intelligent, helpful, and accurate responses across multiple domains.
|
992 |
+
|
993 |
+
**π― Core Capabilities:**
|
994 |
+
β’ **Multi-Domain Expertise**: Specialized knowledge in medical, legal, programming, scientific, creative, and business domains
|
995 |
+
β’ **Intelligent Routing**: Advanced encoder routing system that directs queries to the most appropriate specialized modules
|
996 |
+
β’ **Quality Assurance**: Built-in content validation and safety filtering to ensure appropriate, helpful responses
|
997 |
+
β’ **Adaptive Processing**: Dynamic model selection and optimization based on query complexity and requirements
|
998 |
+
|
999 |
+
**π§ Architecture Features:**
|
1000 |
+
β’ **State-Space Models**: Utilizes advanced Mamba encoder technology (GPU-ready) with intelligent CPU alternatives
|
1001 |
+
β’ **Domain Intelligence**: Sophisticated domain detection and specialized response generation
|
1002 |
+
β’ **Performance Monitoring**: Real-time analytics and optimization for consistent high-quality responses
|
1003 |
+
β’ **Safety Systems**: Multiple layers of content filtering and quality validation
|
1004 |
+
|
1005 |
+
**π€ How I Can Help:**
|
1006 |
+
I'm here to assist with questions, analysis, problem-solving, creative tasks, technical explanations, and professional guidance across various fields. I aim to provide thoughtful, accurate, and helpful responses while maintaining appropriate professional standards.
|
1007 |
+
|
1008 |
+
**Current Status**: Operating in CPU-optimized mode with Mamba encoders ready for GPU activation."""
|
1009 |
+
|
1010 |
fallback_responses = {
|
1011 |
'medical': f"""**π₯ Medical Information Analysis: "{prompt[:60]}..."**
|
1012 |
|