Spaces:

wakeupmh
/

ama-autism

Sleeping

App Files Files Community

wakeupmh commited on Feb 16

Commit

17a97cf

1 Parent(s): 87866cd

refactor: use better model

Browse files

Files changed (1) hide show

app.py +27 -38

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ logging.basicConfig(level=logging.INFO)
 DATA_DIR = "/data" if os.path.exists("/data") else "."
 DATASET_DIR = os.path.join(DATA_DIR, "rag_dataset")
 DATASET_PATH = os.path.join(DATASET_DIR, "dataset")
-MODEL_PATH = "google/mt5-base"
 # Constants for better maintainability
 MAX_ABSTRACT_LENGTH = 1000
@@ -182,7 +182,7 @@ class ModelHandler:
     @st.cache_resource
     def load_model(self):
-        """Load model with improved error handling and resource management"""
         if self.model is None:
             try:
                 self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
@@ -199,13 +199,26 @@ class ModelHandler:
         return True
     def generate_answer(self, question: str, context: str, max_length: int = 512) -> str:
-        """Generate answer with improved prompt engineering and parameters"""
         if not self.load_model():
             return "Error: Model loading failed. Please try again later."
         try:
-            # Improved prompt template
-            input_text = self._create_enhanced_prompt(question, context)
             inputs = self.tokenizer(
                 input_text,
@@ -219,22 +232,22 @@ class ModelHandler:
                 outputs = self.model.generate(
                     **inputs,
                     max_length=max_length,
-                    min_length=200,
-                    num_beams=4,
-                    length_penalty=1.5,
-                    temperature=0.7,
-                    repetition_penalty=1.3,
                     early_stopping=True,
-                    no_repeat_ngram_size=3,
                     do_sample=True,
-                    top_k=40,
-                    top_p=0.95
                 )
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             response = TextProcessor.clean_text(response)
-            if len(response.strip()) < 100:
                 return self._get_fallback_response()
             return self._format_response(response)
@@ -243,30 +256,6 @@ class ModelHandler:
             logging.error(f"Error generating response: {str(e)}")
             return "Error: Could not generate response. Please try again."
-    @staticmethod
-    def _create_enhanced_prompt(question: str, context: str) -> str:
-        """Create an enhanced prompt for better response quality"""
-        return f"""Context: {context}
-Question: {question}
-Instructions:
-1. Provide a clear, evidence-based answer
-2. Include specific findings from the research
-3. Explain practical implications
-4. Use accessible language
-5. Address the question directly
-6. Include relevant examples
-Response should be:
-- Accurate and scientific
-- Easy to understand
-- Practical and applicable
-- Respectful of neurodiversity
-- Supported by the provided research
-Generate a comprehensive response:"""
     @staticmethod
     def _get_fallback_response() -> str:
         """Provide a structured fallback response"""

 DATA_DIR = "/data" if os.path.exists("/data") else "."
 DATASET_DIR = os.path.join(DATA_DIR, "rag_dataset")
 DATASET_PATH = os.path.join(DATASET_DIR, "dataset")
+MODEL_PATH = "google/flan-t5-small"
 # Constants for better maintainability
 MAX_ABSTRACT_LENGTH = 1000
     @st.cache_resource
     def load_model(self):
+        """Load FLAN-T5 Small model with optimized settings"""
         if self.model is None:
             try:
                 self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
         return True
     def generate_answer(self, question: str, context: str, max_length: int = 512) -> str:
+        """Generate answer with FLAN-T5 optimized parameters"""
         if not self.load_model():
             return "Error: Model loading failed. Please try again later."
         try:
+            # FLAN-T5 responds better to direct instruction prompts
+            input_text = f"""Answer the following question about autism using the provided research context.
+                Research Context:
+                {context}
+                Question: {question}
+                Instructions:
+                - Be specific and evidence-based
+                - Use clear, accessible language
+                - Focus on practical implications
+                - Cite research when relevant
+                - Be respectful of neurodiversity
+                Answer:"""
             inputs = self.tokenizer(
                 input_text,
                 outputs = self.model.generate(
                     **inputs,
                     max_length=max_length,
+                    min_length=100,  # Reduzido para FLAN-T5 Small
+                    num_beams=3,     # Ajustado para melhor performance
+                    length_penalty=1.0,  # Mais neutro para respostas concisas
+                    temperature=0.6,     # Mais determinístico
+                    repetition_penalty=1.2,
                     early_stopping=True,
+                    no_repeat_ngram_size=2,
                     do_sample=True,
+                    top_k=30,
+                    top_p=0.92
                 )
             response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             response = TextProcessor.clean_text(response)
+            if len(response.strip()) < 50:  # Ajustado para respostas mais curtas do FLAN-T5
                 return self._get_fallback_response()
             return self._format_response(response)
             logging.error(f"Error generating response: {str(e)}")
             return "Error: Could not generate response. Please try again."
     @staticmethod
     def _get_fallback_response() -> str:
         """Provide a structured fallback response"""