Spaces:
Sleeping
Sleeping
refactor: improve response
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
|
|
8 |
import arxiv
|
9 |
import requests
|
10 |
import xml.etree.ElementTree as ET
|
|
|
11 |
|
12 |
# Configure logging
|
13 |
logging.basicConfig(level=logging.INFO)
|
@@ -33,6 +34,35 @@ def load_local_model():
|
|
33 |
st.error(f"Error loading model: {str(e)}")
|
34 |
return None, None
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
def fetch_arxiv_papers(query, max_results=5):
|
37 |
"""Fetch papers from arXiv"""
|
38 |
client = arxiv.Client()
|
@@ -137,14 +167,21 @@ def search_research_papers(query):
|
|
137 |
all_papers = []
|
138 |
for paper in arxiv_papers + pubmed_papers:
|
139 |
if paper['abstract'] and len(paper['abstract'].strip()) > 0:
|
|
|
|
|
|
|
|
|
140 |
# Check if the paper is actually about autism
|
141 |
-
if ('autism' in
|
142 |
-
'asd' in
|
143 |
-
'autism' in
|
144 |
-
'asd' in
|
|
|
|
|
|
|
145 |
all_papers.append({
|
146 |
-
'title':
|
147 |
-
'text':
|
148 |
'url': paper['url'],
|
149 |
'published': paper['published'],
|
150 |
'relevance_score': paper.get('relevance_score', 0.5)
|
@@ -167,21 +204,24 @@ def generate_answer(question, context, max_length=512):
|
|
167 |
if model is None or tokenizer is None:
|
168 |
return "Error: Could not load the model. Please try again later."
|
169 |
|
|
|
|
|
|
|
170 |
# Format the context as a structured query
|
171 |
-
prompt = f"""You are an expert in autism research.
|
172 |
|
173 |
-
Question: {question}
|
174 |
|
175 |
-
|
176 |
-
{
|
177 |
|
178 |
-
Instructions:
|
179 |
-
1. Starts with a general
|
180 |
-
2.
|
181 |
-
3.
|
182 |
-
4.
|
183 |
|
184 |
-
|
185 |
|
186 |
try:
|
187 |
# Generate response
|
@@ -191,7 +231,7 @@ If the research papers don't directly address the question, focus on providing g
|
|
191 |
outputs = model.generate(
|
192 |
**inputs,
|
193 |
max_length=max_length,
|
194 |
-
min_length=150,
|
195 |
num_beams=4,
|
196 |
length_penalty=1.5,
|
197 |
temperature=0.7,
|
@@ -200,10 +240,11 @@ If the research papers don't directly address the question, focus on providing g
|
|
200 |
)
|
201 |
|
202 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
203 |
|
204 |
# If response is too short or empty, provide a general overview
|
205 |
if len(response.strip()) < 100:
|
206 |
-
return f"""Here's what we know about autism in relation to your question
|
207 |
|
208 |
1. General Understanding:
|
209 |
- Autism Spectrum Disorder (ASD) is a complex developmental condition
|
@@ -211,19 +252,23 @@ If the research papers don't directly address the question, focus on providing g
|
|
211 |
- Each person with autism has unique strengths and challenges
|
212 |
|
213 |
2. Key Aspects:
|
214 |
-
- Communication and social interaction
|
215 |
- Repetitive behaviors and specific interests
|
216 |
- Sensory sensitivities
|
217 |
- Early intervention is important
|
218 |
|
219 |
-
3.
|
220 |
-
|
|
|
|
|
|
|
|
|
221 |
|
222 |
For more specific information, try asking about:
|
223 |
- Specific symptoms or characteristics
|
224 |
- Diagnostic processes
|
225 |
- Treatment approaches
|
226 |
-
-
|
227 |
|
228 |
# Format the response for better readability
|
229 |
formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
|
|
|
8 |
import arxiv
|
9 |
import requests
|
10 |
import xml.etree.ElementTree as ET
|
11 |
+
import re
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.INFO)
|
|
|
34 |
st.error(f"Error loading model: {str(e)}")
|
35 |
return None, None
|
36 |
|
37 |
+
def clean_text(text):
|
38 |
+
"""Clean and normalize text content"""
|
39 |
+
if not text:
|
40 |
+
return ""
|
41 |
+
|
42 |
+
# Remove special characters and normalize spaces
|
43 |
+
text = re.sub(r'[^\w\s.,;:()\-\'"]', ' ', text)
|
44 |
+
text = re.sub(r'\s+', ' ', text)
|
45 |
+
text = text.replace('’', "'").replace('“', '"').replace('â€', '"')
|
46 |
+
|
47 |
+
# Remove any remaining weird characters
|
48 |
+
text = ''.join(char for char in text if ord(char) < 128)
|
49 |
+
|
50 |
+
return text.strip()
|
51 |
+
|
52 |
+
def format_paper(title, abstract):
|
53 |
+
"""Format paper information consistently"""
|
54 |
+
title = clean_text(title)
|
55 |
+
abstract = clean_text(abstract)
|
56 |
+
|
57 |
+
if len(abstract) > 1000:
|
58 |
+
abstract = abstract[:997] + "..."
|
59 |
+
|
60 |
+
return f"""Title: {title}
|
61 |
+
|
62 |
+
Abstract: {abstract}
|
63 |
+
|
64 |
+
---"""
|
65 |
+
|
66 |
def fetch_arxiv_papers(query, max_results=5):
|
67 |
"""Fetch papers from arXiv"""
|
68 |
client = arxiv.Client()
|
|
|
167 |
all_papers = []
|
168 |
for paper in arxiv_papers + pubmed_papers:
|
169 |
if paper['abstract'] and len(paper['abstract'].strip()) > 0:
|
170 |
+
# Clean and format the paper content
|
171 |
+
clean_title = clean_text(paper['title'])
|
172 |
+
clean_abstract = clean_text(paper['abstract'])
|
173 |
+
|
174 |
# Check if the paper is actually about autism
|
175 |
+
if ('autism' in clean_title.lower() or
|
176 |
+
'asd' in clean_title.lower() or
|
177 |
+
'autism' in clean_abstract.lower() or
|
178 |
+
'asd' in clean_abstract.lower()):
|
179 |
+
|
180 |
+
formatted_text = format_paper(clean_title, clean_abstract)
|
181 |
+
|
182 |
all_papers.append({
|
183 |
+
'title': clean_title,
|
184 |
+
'text': formatted_text,
|
185 |
'url': paper['url'],
|
186 |
'published': paper['published'],
|
187 |
'relevance_score': paper.get('relevance_score', 0.5)
|
|
|
204 |
if model is None or tokenizer is None:
|
205 |
return "Error: Could not load the model. Please try again later."
|
206 |
|
207 |
+
# Clean and format the context
|
208 |
+
clean_context = clean_text(context)
|
209 |
+
|
210 |
# Format the context as a structured query
|
211 |
+
prompt = f"""You are an expert in autism research. Based on the following research papers, provide a clear and comprehensive answer about autism.
|
212 |
|
213 |
+
Question: {clean_text(question)}
|
214 |
|
215 |
+
Research Papers:
|
216 |
+
{clean_context}
|
217 |
|
218 |
+
Instructions: Please provide a well-structured response that:
|
219 |
+
1. Starts with a clear, general explanation of the topic
|
220 |
+
2. Includes specific findings from the research papers when relevant
|
221 |
+
3. Explains practical implications for people with autism and their families
|
222 |
+
4. Notes any limitations or areas needing more research
|
223 |
|
224 |
+
Keep your answer focused, clear, and helpful for someone wanting to understand autism better."""
|
225 |
|
226 |
try:
|
227 |
# Generate response
|
|
|
231 |
outputs = model.generate(
|
232 |
**inputs,
|
233 |
max_length=max_length,
|
234 |
+
min_length=150,
|
235 |
num_beams=4,
|
236 |
length_penalty=1.5,
|
237 |
temperature=0.7,
|
|
|
240 |
)
|
241 |
|
242 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
243 |
+
response = clean_text(response)
|
244 |
|
245 |
# If response is too short or empty, provide a general overview
|
246 |
if len(response.strip()) < 100:
|
247 |
+
return f"""Here's what we know about autism in relation to your question:
|
248 |
|
249 |
1. General Understanding:
|
250 |
- Autism Spectrum Disorder (ASD) is a complex developmental condition
|
|
|
252 |
- Each person with autism has unique strengths and challenges
|
253 |
|
254 |
2. Key Aspects:
|
255 |
+
- Communication and social interaction patterns
|
256 |
- Repetitive behaviors and specific interests
|
257 |
- Sensory sensitivities
|
258 |
- Early intervention is important
|
259 |
|
260 |
+
3. Research Focus:
|
261 |
+
- Scientists are studying various aspects including:
|
262 |
+
* Brain development and function
|
263 |
+
* Genetic factors
|
264 |
+
* Environmental influences
|
265 |
+
* Effective interventions and supports
|
266 |
|
267 |
For more specific information, try asking about:
|
268 |
- Specific symptoms or characteristics
|
269 |
- Diagnostic processes
|
270 |
- Treatment approaches
|
271 |
+
- Recent research findings"""
|
272 |
|
273 |
# Format the response for better readability
|
274 |
formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
|