Debito commited on
Commit
2ff9520
Β·
verified Β·
1 Parent(s): cffa4bb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -24
app.py CHANGED
@@ -113,49 +113,49 @@ class UltimateModelLoader:
113
  "vocab_size": 50280,
114
  "d_model": 2048
115
  },
116
- # Alternative efficient models (no mamba-ssm required)
117
- "microsoft/DialoGPT-small": {
118
- "display_name": "DialoGPT Small (117M) [Efficient Alternative]",
119
- "size": "small",
120
- "priority": 13,
121
- "reliable": True,
122
- "params": 117_000_000
123
- },
124
  "gpt2-large": {
125
- "display_name": "GPT2 Large (774M) [High Performance]",
126
  "size": "large",
127
- "priority": 14,
128
  "reliable": True,
129
  "params": 774_000_000
130
  },
131
- # High-quality alternative models (priority 20-27)
132
  "gpt2-medium": {
133
- "display_name": "GPT2 Medium (355M)",
134
  "size": "medium",
135
- "priority": 20,
136
  "reliable": True,
137
  "params": 355_000_000
138
  },
139
  "gpt2": {
140
- "display_name": "GPT2 Base (117M)",
141
  "size": "small",
142
- "priority": 21,
143
  "reliable": True,
144
  "params": 117_000_000
145
  },
146
  "distilgpt2": {
147
- "display_name": "DistilGPT2 (82M)",
148
  "size": "small",
149
- "priority": 22,
150
  "reliable": True,
151
  "params": 82_000_000
152
  },
 
153
  "microsoft/DialoGPT-medium": {
154
- "display_name": "DialoGPT Medium (355M)",
155
  "size": "medium",
156
- "priority": 23,
157
- "reliable": True,
158
  "params": 355_000_000
 
 
 
 
 
 
 
159
  }
160
  })
161
 
@@ -895,14 +895,25 @@ class UltimateMambaSwarm:
895
  return f"Generation error occurred. Using fallback response.", ""
896
 
897
  def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
898
- """Generate using loaded model with ultimate optimization"""
899
  try:
900
  # Get optimal parameters
901
  gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
902
 
 
 
 
 
 
 
 
 
 
 
 
903
  # Tokenize with safety
904
  inputs = self.model_loader.tokenizer.encode(
905
- prompt,
906
  return_tensors="pt",
907
  truncation=True,
908
  max_length=512
@@ -916,21 +927,86 @@ class UltimateMambaSwarm:
916
  # Decode and validate
917
  generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
918
 
919
- # Extract response
920
- if generated_text.startswith(prompt):
 
 
921
  response = generated_text[len(prompt):].strip()
922
  else:
923
  response = generated_text.strip()
924
 
 
 
 
 
 
925
  return response if response else "I'm processing your request..."
926
 
927
  except Exception as e:
928
  logger.error(f"Model generation error: {e}")
929
  return self._generate_ultimate_fallback(prompt, 'general')
930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
931
  def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
932
  """Ultimate fallback responses with maximum quality"""
933
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
934
  fallback_responses = {
935
  'medical': f"""**πŸ₯ Medical Information Analysis: "{prompt[:60]}..."**
936
 
 
113
  "vocab_size": 50280,
114
  "d_model": 2048
115
  },
116
+ # Alternative efficient models (no mamba-ssm required) - GPT2 prioritized over DialoGPT
 
 
 
 
 
 
 
117
  "gpt2-large": {
118
+ "display_name": "GPT2 Large (774M) [High Performance Alternative]",
119
  "size": "large",
120
+ "priority": 13,
121
  "reliable": True,
122
  "params": 774_000_000
123
  },
 
124
  "gpt2-medium": {
125
+ "display_name": "GPT2 Medium (355M) [Balanced Alternative]",
126
  "size": "medium",
127
+ "priority": 14,
128
  "reliable": True,
129
  "params": 355_000_000
130
  },
131
  "gpt2": {
132
+ "display_name": "GPT2 Base (117M) [Fast Alternative]",
133
  "size": "small",
134
+ "priority": 15,
135
  "reliable": True,
136
  "params": 117_000_000
137
  },
138
  "distilgpt2": {
139
+ "display_name": "DistilGPT2 (82M) [Ultra-Fast]",
140
  "size": "small",
141
+ "priority": 16,
142
  "reliable": True,
143
  "params": 82_000_000
144
  },
145
+ # Conversational models (lower priority due to potential inappropriate responses)
146
  "microsoft/DialoGPT-medium": {
147
+ "display_name": "DialoGPT Medium (355M) [Conversational]",
148
  "size": "medium",
149
+ "priority": 25,
150
+ "reliable": False, # Marked as less reliable due to Reddit training data
151
  "params": 355_000_000
152
+ },
153
+ "microsoft/DialoGPT-small": {
154
+ "display_name": "DialoGPT Small (117M) [Conversational]",
155
+ "size": "small",
156
+ "priority": 26,
157
+ "reliable": False, # Marked as less reliable due to Reddit training data
158
+ "params": 117_000_000
159
  }
160
  })
161
 
 
895
  return f"Generation error occurred. Using fallback response.", ""
896
 
897
  def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
898
+ """Generate using loaded model with ultimate optimization and content safety"""
899
  try:
900
  # Get optimal parameters
901
  gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
902
 
903
+ # Add content safety parameters
904
+ gen_params.update({
905
+ "repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
906
+ "no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
907
+ "temperature": min(gen_params.get("temperature", 0.7), 0.8), # Cap temperature for safety
908
+ "top_p": min(gen_params.get("top_p", 0.9), 0.85) # More focused sampling
909
+ })
910
+
911
+ # Create safer prompt format
912
+ safe_prompt = f"Question: {prompt}\nAnswer:"
913
+
914
  # Tokenize with safety
915
  inputs = self.model_loader.tokenizer.encode(
916
+ safe_prompt,
917
  return_tensors="pt",
918
  truncation=True,
919
  max_length=512
 
927
  # Decode and validate
928
  generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
929
 
930
+ # Extract response safely
931
+ if generated_text.startswith(safe_prompt):
932
+ response = generated_text[len(safe_prompt):].strip()
933
+ elif generated_text.startswith(prompt):
934
  response = generated_text[len(prompt):].strip()
935
  else:
936
  response = generated_text.strip()
937
 
938
+ # Content safety filtering
939
+ if self._is_inappropriate_content(response):
940
+ logger.warning("πŸ›‘οΈ Inappropriate content detected, using fallback")
941
+ return self._generate_ultimate_fallback(prompt, 'general')
942
+
943
  return response if response else "I'm processing your request..."
944
 
945
  except Exception as e:
946
  logger.error(f"Model generation error: {e}")
947
  return self._generate_ultimate_fallback(prompt, 'general')
948
 
949
+ def _is_inappropriate_content(self, text: str) -> bool:
950
+ """Advanced content safety filtering"""
951
+ if not text or len(text.strip()) < 3:
952
+ return True
953
+
954
+ text_lower = text.lower()
955
+
956
+ # Check for inappropriate content patterns
957
+ inappropriate_patterns = [
958
+ # Sexual content
959
+ 'sexual', 'dude who likes to have fun with dudes', 'sexual orientation',
960
+ # Offensive language (basic filter)
961
+ 'damn', 'hell', 'stupid', 'idiot',
962
+ # Inappropriate casual language
963
+ 'just a dude', 'i\'m just a', 'whatever man',
964
+ # Reddit-style inappropriate responses
965
+ 'bro', 'dude', 'man', 'guys', 'lol', 'lmao', 'wtf'
966
+ ]
967
+
968
+ # Check for patterns that suggest inappropriate content
969
+ for pattern in inappropriate_patterns:
970
+ if pattern in text_lower:
971
+ return True
972
+
973
+ # Check for very short, casual responses that don't answer the question
974
+ if len(text.strip()) < 20 and any(word in text_lower for word in ['dude', 'bro', 'man', 'whatever']):
975
+ return True
976
+
977
+ # Check for responses that don't seem to address the prompt properly
978
+ if 'tell me more about yourself' in text_lower and len(text.strip()) < 100:
979
+ return True
980
+
981
+ return False
982
+
983
  def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
984
  """Ultimate fallback responses with maximum quality"""
985
 
986
+ # Special handling for self-introduction prompts
987
+ prompt_lower = prompt.lower()
988
+ if any(phrase in prompt_lower for phrase in ['tell me about yourself', 'who are you', 'what are you']):
989
+ return """**🐍 Mamba Encoder Swarm AI Assistant**
990
+
991
+ I'm an advanced AI language model powered by the Mamba Encoder Swarm architecture, designed to provide intelligent, helpful, and accurate responses across multiple domains.
992
+
993
+ **🎯 Core Capabilities:**
994
+ β€’ **Multi-Domain Expertise**: Specialized knowledge in medical, legal, programming, scientific, creative, and business domains
995
+ β€’ **Intelligent Routing**: Advanced encoder routing system that directs queries to the most appropriate specialized modules
996
+ β€’ **Quality Assurance**: Built-in content validation and safety filtering to ensure appropriate, helpful responses
997
+ β€’ **Adaptive Processing**: Dynamic model selection and optimization based on query complexity and requirements
998
+
999
+ **🧠 Architecture Features:**
1000
+ β€’ **State-Space Models**: Utilizes advanced Mamba encoder technology (GPU-ready) with intelligent CPU alternatives
1001
+ β€’ **Domain Intelligence**: Sophisticated domain detection and specialized response generation
1002
+ β€’ **Performance Monitoring**: Real-time analytics and optimization for consistent high-quality responses
1003
+ β€’ **Safety Systems**: Multiple layers of content filtering and quality validation
1004
+
1005
+ **🀝 How I Can Help:**
1006
+ I'm here to assist with questions, analysis, problem-solving, creative tasks, technical explanations, and professional guidance across various fields. I aim to provide thoughtful, accurate, and helpful responses while maintaining appropriate professional standards.
1007
+
1008
+ **Current Status**: Operating in CPU-optimized mode with Mamba encoders ready for GPU activation."""
1009
+
1010
  fallback_responses = {
1011
  'medical': f"""**πŸ₯ Medical Information Analysis: "{prompt[:60]}..."**
1012