wakeupmh commited on
Commit
58be7e5
·
1 Parent(s): 218a8a7

refactor: improve response

Browse files
Files changed (1) hide show
  1. app.py +67 -22
app.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  import arxiv
9
  import requests
10
  import xml.etree.ElementTree as ET
 
11
 
12
  # Configure logging
13
  logging.basicConfig(level=logging.INFO)
@@ -33,6 +34,35 @@ def load_local_model():
33
  st.error(f"Error loading model: {str(e)}")
34
  return None, None
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def fetch_arxiv_papers(query, max_results=5):
37
  """Fetch papers from arXiv"""
38
  client = arxiv.Client()
@@ -137,14 +167,21 @@ def search_research_papers(query):
137
  all_papers = []
138
  for paper in arxiv_papers + pubmed_papers:
139
  if paper['abstract'] and len(paper['abstract'].strip()) > 0:
 
 
 
 
140
  # Check if the paper is actually about autism
141
- if ('autism' in paper['title'].lower() or
142
- 'asd' in paper['title'].lower() or
143
- 'autism' in paper['abstract'].lower() or
144
- 'asd' in paper['abstract'].lower()):
 
 
 
145
  all_papers.append({
146
- 'title': paper['title'],
147
- 'text': f"Title: {paper['title']}\n\nAbstract: {paper['abstract']}",
148
  'url': paper['url'],
149
  'published': paper['published'],
150
  'relevance_score': paper.get('relevance_score', 0.5)
@@ -167,21 +204,24 @@ def generate_answer(question, context, max_length=512):
167
  if model is None or tokenizer is None:
168
  return "Error: Could not load the model. Please try again later."
169
 
 
 
 
170
  # Format the context as a structured query
171
- prompt = f"""You are an expert in autism research. Provide a comprehensive answer about autism, incorporating both general knowledge and specific research findings when available.
172
 
173
- Question: {question}
174
 
175
- Recent Research Context:
176
- {context}
177
 
178
- Instructions: Provide a detailed response that:
179
- 1. Starts with a general overview of the topic as it relates to autism
180
- 2. Incorporates specific findings from the provided research papers when relevant
181
- 3. Discusses practical implications for individuals with autism and their families
182
- 4. Mentions any limitations in current understanding
183
 
184
- If the research papers don't directly address the question, focus on providing general, well-established information about autism while noting what specific research would be helpful."""
185
 
186
  try:
187
  # Generate response
@@ -191,7 +231,7 @@ If the research papers don't directly address the question, focus on providing g
191
  outputs = model.generate(
192
  **inputs,
193
  max_length=max_length,
194
- min_length=150, # Increased minimum length for more comprehensive answers
195
  num_beams=4,
196
  length_penalty=1.5,
197
  temperature=0.7,
@@ -200,10 +240,11 @@ If the research papers don't directly address the question, focus on providing g
200
  )
201
 
202
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
203
 
204
  # If response is too short or empty, provide a general overview
205
  if len(response.strip()) < 100:
206
- return f"""Here's what we know about autism in relation to your question about {question}:
207
 
208
  1. General Understanding:
209
  - Autism Spectrum Disorder (ASD) is a complex developmental condition
@@ -211,19 +252,23 @@ If the research papers don't directly address the question, focus on providing g
211
  - Each person with autism has unique strengths and challenges
212
 
213
  2. Key Aspects:
214
- - Communication and social interaction
215
  - Repetitive behaviors and specific interests
216
  - Sensory sensitivities
217
  - Early intervention is important
218
 
219
- 3. Current Research:
220
- While the provided research papers don't directly address your specific question, researchers are actively studying various aspects of autism to better understand its causes, characteristics, and effective interventions.
 
 
 
 
221
 
222
  For more specific information, try asking about:
223
  - Specific symptoms or characteristics
224
  - Diagnostic processes
225
  - Treatment approaches
226
- - Current research in specific areas"""
227
 
228
  # Format the response for better readability
229
  formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
 
8
  import arxiv
9
  import requests
10
  import xml.etree.ElementTree as ET
11
+ import re
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO)
 
34
  st.error(f"Error loading model: {str(e)}")
35
  return None, None
36
 
37
+ def clean_text(text):
38
+ """Clean and normalize text content"""
39
+ if not text:
40
+ return ""
41
+
42
+ # Remove special characters and normalize spaces
43
+ text = re.sub(r'[^\w\s.,;:()\-\'"]', ' ', text)
44
+ text = re.sub(r'\s+', ' ', text)
45
+ text = text.replace('’', "'").replace('“', '"').replace('â€', '"')
46
+
47
+ # Remove any remaining weird characters
48
+ text = ''.join(char for char in text if ord(char) < 128)
49
+
50
+ return text.strip()
51
+
52
+ def format_paper(title, abstract):
53
+ """Format paper information consistently"""
54
+ title = clean_text(title)
55
+ abstract = clean_text(abstract)
56
+
57
+ if len(abstract) > 1000:
58
+ abstract = abstract[:997] + "..."
59
+
60
+ return f"""Title: {title}
61
+
62
+ Abstract: {abstract}
63
+
64
+ ---"""
65
+
66
  def fetch_arxiv_papers(query, max_results=5):
67
  """Fetch papers from arXiv"""
68
  client = arxiv.Client()
 
167
  all_papers = []
168
  for paper in arxiv_papers + pubmed_papers:
169
  if paper['abstract'] and len(paper['abstract'].strip()) > 0:
170
+ # Clean and format the paper content
171
+ clean_title = clean_text(paper['title'])
172
+ clean_abstract = clean_text(paper['abstract'])
173
+
174
  # Check if the paper is actually about autism
175
+ if ('autism' in clean_title.lower() or
176
+ 'asd' in clean_title.lower() or
177
+ 'autism' in clean_abstract.lower() or
178
+ 'asd' in clean_abstract.lower()):
179
+
180
+ formatted_text = format_paper(clean_title, clean_abstract)
181
+
182
  all_papers.append({
183
+ 'title': clean_title,
184
+ 'text': formatted_text,
185
  'url': paper['url'],
186
  'published': paper['published'],
187
  'relevance_score': paper.get('relevance_score', 0.5)
 
204
  if model is None or tokenizer is None:
205
  return "Error: Could not load the model. Please try again later."
206
 
207
+ # Clean and format the context
208
+ clean_context = clean_text(context)
209
+
210
  # Format the context as a structured query
211
+ prompt = f"""You are an expert in autism research. Based on the following research papers, provide a clear and comprehensive answer about autism.
212
 
213
+ Question: {clean_text(question)}
214
 
215
+ Research Papers:
216
+ {clean_context}
217
 
218
+ Instructions: Please provide a well-structured response that:
219
+ 1. Starts with a clear, general explanation of the topic
220
+ 2. Includes specific findings from the research papers when relevant
221
+ 3. Explains practical implications for people with autism and their families
222
+ 4. Notes any limitations or areas needing more research
223
 
224
+ Keep your answer focused, clear, and helpful for someone wanting to understand autism better."""
225
 
226
  try:
227
  # Generate response
 
231
  outputs = model.generate(
232
  **inputs,
233
  max_length=max_length,
234
+ min_length=150,
235
  num_beams=4,
236
  length_penalty=1.5,
237
  temperature=0.7,
 
240
  )
241
 
242
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
243
+ response = clean_text(response)
244
 
245
  # If response is too short or empty, provide a general overview
246
  if len(response.strip()) < 100:
247
+ return f"""Here's what we know about autism in relation to your question:
248
 
249
  1. General Understanding:
250
  - Autism Spectrum Disorder (ASD) is a complex developmental condition
 
252
  - Each person with autism has unique strengths and challenges
253
 
254
  2. Key Aspects:
255
+ - Communication and social interaction patterns
256
  - Repetitive behaviors and specific interests
257
  - Sensory sensitivities
258
  - Early intervention is important
259
 
260
+ 3. Research Focus:
261
+ - Scientists are studying various aspects including:
262
+ * Brain development and function
263
+ * Genetic factors
264
+ * Environmental influences
265
+ * Effective interventions and supports
266
 
267
  For more specific information, try asking about:
268
  - Specific symptoms or characteristics
269
  - Diagnostic processes
270
  - Treatment approaches
271
+ - Recent research findings"""
272
 
273
  # Format the response for better readability
274
  formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")