Spaces:

wakeupmh
/

ama-autism

Sleeping

App Files Files Community

wakeupmh commited on Feb 15

Commit

58be7e5

1 Parent(s): 218a8a7

refactor: improve response

Browse files

Files changed (1) hide show

app.py +67 -22

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import arxiv
 import requests
 import xml.etree.ElementTree as ET
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -33,6 +34,35 @@ def load_local_model():
         st.error(f"Error loading model: {str(e)}")
         return None, None
 def fetch_arxiv_papers(query, max_results=5):
     """Fetch papers from arXiv"""
     client = arxiv.Client()
@@ -137,14 +167,21 @@ def search_research_papers(query):
     all_papers = []
     for paper in arxiv_papers + pubmed_papers:
         if paper['abstract'] and len(paper['abstract'].strip()) > 0:
             # Check if the paper is actually about autism
-            if ('autism' in paper['title'].lower() or
-                'asd' in paper['title'].lower() or
-                'autism' in paper['abstract'].lower() or
-                'asd' in paper['abstract'].lower()):
                 all_papers.append({
-                    'title': paper['title'],
-                    'text': f"Title: {paper['title']}\n\nAbstract: {paper['abstract']}",
                     'url': paper['url'],
                     'published': paper['published'],
                     'relevance_score': paper.get('relevance_score', 0.5)
@@ -167,21 +204,24 @@ def generate_answer(question, context, max_length=512):
     if model is None or tokenizer is None:
         return "Error: Could not load the model. Please try again later."
     # Format the context as a structured query
-    prompt = f"""You are an expert in autism research. Provide a comprehensive answer about autism, incorporating both general knowledge and specific research findings when available.
-Question: {question}
-Recent Research Context:
-{context}
-Instructions: Provide a detailed response that:
-1. Starts with a general overview of the topic as it relates to autism
-2. Incorporates specific findings from the provided research papers when relevant
-3. Discusses practical implications for individuals with autism and their families
-4. Mentions any limitations in current understanding
-If the research papers don't directly address the question, focus on providing general, well-established information about autism while noting what specific research would be helpful."""
     try:
         # Generate response
@@ -191,7 +231,7 @@ If the research papers don't directly address the question, focus on providing g
             outputs = model.generate(
                 **inputs,
                 max_length=max_length,
-                min_length=150,  # Increased minimum length for more comprehensive answers
                 num_beams=4,
                 length_penalty=1.5,
                 temperature=0.7,
@@ -200,10 +240,11 @@ If the research papers don't directly address the question, focus on providing g
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # If response is too short or empty, provide a general overview
         if len(response.strip()) < 100:
-            return f"""Here's what we know about autism in relation to your question about {question}:
 1. General Understanding:
 - Autism Spectrum Disorder (ASD) is a complex developmental condition
@@ -211,19 +252,23 @@ If the research papers don't directly address the question, focus on providing g
 - Each person with autism has unique strengths and challenges
 2. Key Aspects:
-- Communication and social interaction
 - Repetitive behaviors and specific interests
 - Sensory sensitivities
 - Early intervention is important
-3. Current Research:
-While the provided research papers don't directly address your specific question, researchers are actively studying various aspects of autism to better understand its causes, characteristics, and effective interventions.
 For more specific information, try asking about:
 - Specific symptoms or characteristics
 - Diagnostic processes
 - Treatment approaches
-- Current research in specific areas"""
         # Format the response for better readability
         formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")

 import arxiv
 import requests
 import xml.etree.ElementTree as ET
+import re
 # Configure logging
 logging.basicConfig(level=logging.INFO)
         st.error(f"Error loading model: {str(e)}")
         return None, None
+def clean_text(text):
+    """Clean and normalize text content"""
+    if not text:
+        return ""
+    # Remove special characters and normalize spaces
+    text = re.sub(r'[^\w\s.,;:()\-\'"]', ' ', text)
+    text = re.sub(r'\s+', ' ', text)
+    text = text.replace('â€™', "'").replace('â€œ', '"').replace('â€', '"')
+    # Remove any remaining weird characters
+    text = ''.join(char for char in text if ord(char) < 128)
+    return text.strip()
+def format_paper(title, abstract):
+    """Format paper information consistently"""
+    title = clean_text(title)
+    abstract = clean_text(abstract)
+    if len(abstract) > 1000:
+        abstract = abstract[:997] + "..."
+    return f"""Title: {title}
+Abstract: {abstract}
+---"""
 def fetch_arxiv_papers(query, max_results=5):
     """Fetch papers from arXiv"""
     client = arxiv.Client()
     all_papers = []
     for paper in arxiv_papers + pubmed_papers:
         if paper['abstract'] and len(paper['abstract'].strip()) > 0:
+            # Clean and format the paper content
+            clean_title = clean_text(paper['title'])
+            clean_abstract = clean_text(paper['abstract'])
             # Check if the paper is actually about autism
+            if ('autism' in clean_title.lower() or
+                'asd' in clean_title.lower() or
+                'autism' in clean_abstract.lower() or
+                'asd' in clean_abstract.lower()):
+                formatted_text = format_paper(clean_title, clean_abstract)
                 all_papers.append({
+                    'title': clean_title,
+                    'text': formatted_text,
                     'url': paper['url'],
                     'published': paper['published'],
                     'relevance_score': paper.get('relevance_score', 0.5)
     if model is None or tokenizer is None:
         return "Error: Could not load the model. Please try again later."
+    # Clean and format the context
+    clean_context = clean_text(context)
     # Format the context as a structured query
+    prompt = f"""You are an expert in autism research. Based on the following research papers, provide a clear and comprehensive answer about autism.
+Question: {clean_text(question)}
+Research Papers:
+{clean_context}
+Instructions: Please provide a well-structured response that:
+1. Starts with a clear, general explanation of the topic
+2. Includes specific findings from the research papers when relevant
+3. Explains practical implications for people with autism and their families
+4. Notes any limitations or areas needing more research
+Keep your answer focused, clear, and helpful for someone wanting to understand autism better."""
     try:
         # Generate response
             outputs = model.generate(
                 **inputs,
                 max_length=max_length,
+                min_length=150,
                 num_beams=4,
                 length_penalty=1.5,
                 temperature=0.7,
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = clean_text(response)
         # If response is too short or empty, provide a general overview
         if len(response.strip()) < 100:
+            return f"""Here's what we know about autism in relation to your question:
 1. General Understanding:
 - Autism Spectrum Disorder (ASD) is a complex developmental condition
 - Each person with autism has unique strengths and challenges
 2. Key Aspects:
+- Communication and social interaction patterns
 - Repetitive behaviors and specific interests
 - Sensory sensitivities
 - Early intervention is important
+3. Research Focus:
+- Scientists are studying various aspects including:
+  * Brain development and function
+  * Genetic factors
+  * Environmental influences
+  * Effective interventions and supports
 For more specific information, try asking about:
 - Specific symptoms or characteristics
 - Diagnostic processes
 - Treatment approaches
+- Recent research findings"""
         # Format the response for better readability
         formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")