Spaces:
Sleeping
Sleeping
feat: add BART
Browse files
app.py
CHANGED
@@ -37,11 +37,8 @@ def fetch_arxiv_papers(query, max_results=5):
|
|
37 |
"""Fetch papers from arXiv"""
|
38 |
client = arxiv.Client()
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
search_query = f"(ti:{query} OR abs:{query}) AND (ti:autism OR abs:autism) AND cat:q-bio"
|
43 |
-
else:
|
44 |
-
search_query = f"(ti:{query} OR abs:{query}) AND cat:q-bio"
|
45 |
|
46 |
# Search arXiv
|
47 |
search = arxiv.Search(
|
@@ -52,12 +49,18 @@ def fetch_arxiv_papers(query, max_results=5):
|
|
52 |
|
53 |
papers = []
|
54 |
for result in client.results(search):
|
55 |
-
papers
|
56 |
-
|
57 |
-
'
|
58 |
-
'
|
59 |
-
'
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
return papers
|
63 |
|
@@ -65,11 +68,8 @@ def fetch_pubmed_papers(query, max_results=5):
|
|
65 |
"""Fetch papers from PubMed"""
|
66 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
67 |
|
68 |
-
#
|
69 |
-
|
70 |
-
search_term = f"({query}) AND (autism[Title/Abstract] OR ASD[Title/Abstract])"
|
71 |
-
else:
|
72 |
-
search_term = query
|
73 |
|
74 |
# Search for papers
|
75 |
search_url = f"{base_url}/esearch.fcgi"
|
@@ -109,12 +109,19 @@ def fetch_pubmed_papers(query, max_results=5):
|
|
109 |
pmid = article.find('.//PMID')
|
110 |
|
111 |
if title is not None and abstract is not None:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
except Exception as e:
|
120 |
st.error(f"Error fetching PubMed papers: {str(e)}")
|
@@ -130,14 +137,28 @@ def search_research_papers(query):
|
|
130 |
all_papers = []
|
131 |
for paper in arxiv_papers + pubmed_papers:
|
132 |
if paper['abstract'] and len(paper['abstract'].strip()) > 0:
|
133 |
-
|
134 |
-
|
135 |
-
'
|
136 |
-
'
|
137 |
-
'
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
def generate_answer(question, context, max_length=512):
|
143 |
"""Generate a comprehensive answer using the local model"""
|
@@ -146,21 +167,24 @@ def generate_answer(question, context, max_length=512):
|
|
146 |
if model is None or tokenizer is None:
|
147 |
return "Error: Could not load the model. Please try again later."
|
148 |
|
|
|
|
|
|
|
149 |
# Format the context as a structured query
|
150 |
-
prompt = f"""
|
151 |
|
152 |
Research Context:
|
153 |
{context}
|
154 |
|
155 |
Question: {question}
|
156 |
|
157 |
-
Instructions:
|
158 |
-
1. Main findings from the research
|
159 |
-
2. Research methods used
|
160 |
-
3. Clinical implications
|
161 |
-
4. Limitations of the studies
|
162 |
|
163 |
-
If the research
|
164 |
|
165 |
try:
|
166 |
# Generate response
|
@@ -182,13 +206,13 @@ If the research doesn't address the question directly, explain what information
|
|
182 |
|
183 |
# If response is too short or empty, provide a fallback message
|
184 |
if len(response.strip()) < 50:
|
185 |
-
return """I apologize, but I couldn't
|
186 |
This might be because:
|
187 |
-
1. The research papers don't directly address
|
188 |
-
2. The context needs more specific information
|
189 |
-
3. The question might need to be more specific
|
190 |
|
191 |
-
Please try
|
192 |
|
193 |
# Format the response for better readability
|
194 |
formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
|
|
|
37 |
"""Fetch papers from arXiv"""
|
38 |
client = arxiv.Client()
|
39 |
|
40 |
+
# Always include autism in the search query
|
41 |
+
search_query = f"(ti:autism OR abs:autism) AND (ti:\"{query}\" OR abs:\"{query}\") AND cat:q-bio"
|
|
|
|
|
|
|
42 |
|
43 |
# Search arXiv
|
44 |
search = arxiv.Search(
|
|
|
49 |
|
50 |
papers = []
|
51 |
for result in client.results(search):
|
52 |
+
# Only include papers that mention autism in title or abstract
|
53 |
+
if ('autism' in result.title.lower() or
|
54 |
+
'asd' in result.title.lower() or
|
55 |
+
'autism' in result.summary.lower() or
|
56 |
+
'asd' in result.summary.lower()):
|
57 |
+
papers.append({
|
58 |
+
'title': result.title,
|
59 |
+
'abstract': result.summary,
|
60 |
+
'url': result.pdf_url,
|
61 |
+
'published': result.published.strftime("%Y-%m-%d"),
|
62 |
+
'relevance_score': 1 if 'autism' in result.title.lower() else 0.5
|
63 |
+
})
|
64 |
|
65 |
return papers
|
66 |
|
|
|
68 |
"""Fetch papers from PubMed"""
|
69 |
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
70 |
|
71 |
+
# Always include autism in the search term
|
72 |
+
search_term = f"(autism[Title/Abstract] OR ASD[Title/Abstract]) AND ({query}[Title/Abstract])"
|
|
|
|
|
|
|
73 |
|
74 |
# Search for papers
|
75 |
search_url = f"{base_url}/esearch.fcgi"
|
|
|
109 |
pmid = article.find('.//PMID')
|
110 |
|
111 |
if title is not None and abstract is not None:
|
112 |
+
title_text = title.text.lower()
|
113 |
+
abstract_text = abstract.text.lower()
|
114 |
+
|
115 |
+
# Only include papers that mention autism
|
116 |
+
if ('autism' in title_text or 'asd' in title_text or
|
117 |
+
'autism' in abstract_text or 'asd' in abstract_text):
|
118 |
+
papers.append({
|
119 |
+
'title': title.text,
|
120 |
+
'abstract': abstract.text,
|
121 |
+
'url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid.text}/",
|
122 |
+
'published': year.text if year is not None else 'Unknown',
|
123 |
+
'relevance_score': 1 if ('autism' in title_text or 'asd' in title_text) else 0.5
|
124 |
+
})
|
125 |
|
126 |
except Exception as e:
|
127 |
st.error(f"Error fetching PubMed papers: {str(e)}")
|
|
|
137 |
all_papers = []
|
138 |
for paper in arxiv_papers + pubmed_papers:
|
139 |
if paper['abstract'] and len(paper['abstract'].strip()) > 0:
|
140 |
+
# Check if the paper is actually about autism
|
141 |
+
if ('autism' in paper['title'].lower() or
|
142 |
+
'asd' in paper['title'].lower() or
|
143 |
+
'autism' in paper['abstract'].lower() or
|
144 |
+
'asd' in paper['abstract'].lower()):
|
145 |
+
all_papers.append({
|
146 |
+
'title': paper['title'],
|
147 |
+
'text': f"Title: {paper['title']}\n\nAbstract: {paper['abstract']}",
|
148 |
+
'url': paper['url'],
|
149 |
+
'published': paper['published'],
|
150 |
+
'relevance_score': paper.get('relevance_score', 0.5)
|
151 |
+
})
|
152 |
|
153 |
+
# Sort papers by relevance score and convert to DataFrame
|
154 |
+
all_papers.sort(key=lambda x: x['relevance_score'], reverse=True)
|
155 |
+
df = pd.DataFrame(all_papers)
|
156 |
+
|
157 |
+
if df.empty:
|
158 |
+
st.warning("No autism-related papers found. Please try a different search term.")
|
159 |
+
return pd.DataFrame(columns=['title', 'text', 'url', 'published', 'relevance_score'])
|
160 |
+
|
161 |
+
return df
|
162 |
|
163 |
def generate_answer(question, context, max_length=512):
|
164 |
"""Generate a comprehensive answer using the local model"""
|
|
|
167 |
if model is None or tokenizer is None:
|
168 |
return "Error: Could not load the model. Please try again later."
|
169 |
|
170 |
+
if not context or len(context.strip()) == 0:
|
171 |
+
return "No relevant autism research papers found to answer your question. Please try rephrasing your question."
|
172 |
+
|
173 |
# Format the context as a structured query
|
174 |
+
prompt = f"""Based on the following autism research papers, provide a detailed answer:
|
175 |
|
176 |
Research Context:
|
177 |
{context}
|
178 |
|
179 |
Question: {question}
|
180 |
|
181 |
+
Instructions: Provide a comprehensive answer that covers:
|
182 |
+
1. Main findings about autism from the research
|
183 |
+
2. Research methods used in autism studies
|
184 |
+
3. Clinical implications for autism treatment
|
185 |
+
4. Limitations of the autism studies
|
186 |
|
187 |
+
If the research papers don't directly address autism and the question, explain what information is missing."""
|
188 |
|
189 |
try:
|
190 |
# Generate response
|
|
|
206 |
|
207 |
# If response is too short or empty, provide a fallback message
|
208 |
if len(response.strip()) < 50:
|
209 |
+
return """I apologize, but I couldn't find specific information about autism in the research papers provided.
|
210 |
This might be because:
|
211 |
+
1. The research papers don't directly address autism
|
212 |
+
2. The context needs more specific information about autism
|
213 |
+
3. The question might need to be more focused on autism-specific aspects
|
214 |
|
215 |
+
Please try asking a more specific question about autism."""
|
216 |
|
217 |
# Format the response for better readability
|
218 |
formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
|