Spaces:

wakeupmh
/

ama-autism

Sleeping

App Files Files Community

wakeupmh commited on Feb 15

Commit

e348a54

1 Parent(s): 58be7e5

refactor: improve response

Browse files

Files changed (2) hide show

app.py +48 -34
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import streamlit as st
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-import os
-from datasets import load_from_disk, Dataset
 import torch
 import logging
-import pandas as pd
 import arxiv
 import requests
 import xml.etree.ElementTree as ET
@@ -17,14 +16,14 @@ logging.basicConfig(level=logging.INFO)
 DATA_DIR = "/data" if os.path.exists("/data") else "."
 DATASET_DIR = os.path.join(DATA_DIR, "rag_dataset")
 DATASET_PATH = os.path.join(DATASET_DIR, "dataset")
-MODEL_PATH = "t5-small"  # Changed to T5-small for better CPU compatibility
 @st.cache_resource
 def load_local_model():
     """Load the local Hugging Face model"""
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
-        model = AutoModelForSeq2SeqLM.from_pretrained(
             MODEL_PATH,
             device_map={"": "cpu"},  # Force CPU
             torch_dtype=torch.float32
@@ -206,37 +205,46 @@ def generate_answer(question, context, max_length=512):
     # Clean and format the context
     clean_context = clean_text(context)
-    # Format the context as a structured query
-    prompt = f"""You are an expert in autism research. Based on the following research papers, provide a clear and comprehensive answer about autism.
-Question: {clean_text(question)}
 Research Papers:
 {clean_context}
-Instructions: Please provide a well-structured response that:
-1. Starts with a clear, general explanation of the topic
-2. Includes specific findings from the research papers when relevant
-3. Explains practical implications for people with autism and their families
-4. Notes any limitations or areas needing more research
-Keep your answer focused, clear, and helpful for someone wanting to understand autism better."""
     try:
-        # Generate response
-        inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
         with torch.inference_mode():
             outputs = model.generate(
                 **inputs,
                 max_length=max_length,
-                min_length=150,
-                num_beams=4,
                 length_penalty=1.5,
                 temperature=0.7,
                 repetition_penalty=1.2,
-                early_stopping=True
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -247,28 +255,34 @@ Keep your answer focused, clear, and helpful for someone wanting to understand a
             return f"""Here's what we know about autism in relation to your question:
 1. General Understanding:
-- Autism Spectrum Disorder (ASD) is a complex developmental condition
-- It affects how a person communicates, learns, and interacts with others
-- Each person with autism has unique strengths and challenges
-2. Key Aspects:
-- Communication and social interaction patterns
 - Repetitive behaviors and specific interests
 - Sensory sensitivities
-- Early intervention is important
-3. Research Focus:
-- Scientists are studying various aspects including:
-  * Brain development and function
-  * Genetic factors
-  * Environmental influences
-  * Effective interventions and supports
 For more specific information, try asking about:
-- Specific symptoms or characteristics
 - Diagnostic processes
 - Treatment approaches
-- Recent research findings"""
         # Format the response for better readability
         formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")

 import streamlit as st
+import pandas as pd
 import torch
 import logging
+import os
+from transformers import AutoTokenizer, T5ForConditionalGeneration
 import arxiv
 import requests
 import xml.etree.ElementTree as ET
 DATA_DIR = "/data" if os.path.exists("/data") else "."
 DATASET_DIR = os.path.join(DATA_DIR, "rag_dataset")
 DATASET_PATH = os.path.join(DATASET_DIR, "dataset")
+MODEL_PATH = "google/flan-t5-small"  # Using flan-t5-small for better performance
 @st.cache_resource
 def load_local_model():
     """Load the local Hugging Face model"""
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
+        model = T5ForConditionalGeneration.from_pretrained(
             MODEL_PATH,
             device_map={"": "cpu"},  # Force CPU
             torch_dtype=torch.float32
     # Clean and format the context
     clean_context = clean_text(context)
+    clean_question = clean_text(question)
+    # Format the input for T5 (it expects a specific format)
+    input_text = f"""Answer the following question about autism using the provided research papers.
+Question: {clean_question}
 Research Papers:
 {clean_context}
+Instructions: Provide a detailed answer that:
+1. Explains the main concepts clearly
+2. Uses specific evidence from the research
+3. Discusses practical implications
+4. Notes any limitations
+Answer:"""
     try:
+        # T5 expects a specific format for the input
+        inputs = tokenizer(input_text,
+                         return_tensors="pt",
+                         max_length=1024,
+                         truncation=True,
+                         padding=True)
         with torch.inference_mode():
             outputs = model.generate(
                 **inputs,
                 max_length=max_length,
+                min_length=100,
+                num_beams=5,
                 length_penalty=1.5,
                 temperature=0.7,
                 repetition_penalty=1.2,
+                early_stopping=True,
+                no_repeat_ngram_size=3,
+                do_sample=True,
+                top_k=50,
+                top_p=0.95
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
             return f"""Here's what we know about autism in relation to your question:
 1. General Understanding:
+- Autism Spectrum Disorder (ASD) is a complex neurodevelopmental condition
+- It affects how a person perceives, communicates, and interacts with the world
+- Each individual with autism has unique strengths and challenges
+- Early identification and support are crucial
+2. Key Characteristics:
+- Social communication and interaction patterns
 - Repetitive behaviors and specific interests
 - Sensory sensitivities
+- Variable cognitive and language abilities
+3. Important Considerations:
+- Autism is a spectrum, meaning it affects each person differently
+- Support needs vary from person to person
+- Many individuals with autism have unique talents and abilities
+- Research continues to improve our understanding
+4. Current Research Areas:
+- Brain development and neurology
+- Genetic and environmental factors
+- Early intervention methods
+- Support strategies and therapies
 For more specific information, try asking about:
+- Specific autism characteristics
 - Diagnostic processes
 - Treatment approaches
+- Latest research findings"""
         # Format the response for better readability
         formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 streamlit>=1.32.0
-transformers>=4.37.0
 datasets>=2.17.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch>=2.2.0

 streamlit>=1.32.0
+transformers==4.36.2
 datasets>=2.17.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch>=2.2.0