wakeupmh commited on
Commit
7842508
·
1 Parent(s): a47c92e

feat: add BART

Browse files
Files changed (1) hide show
  1. app.py +65 -41
app.py CHANGED
@@ -37,11 +37,8 @@ def fetch_arxiv_papers(query, max_results=5):
37
  """Fetch papers from arXiv"""
38
  client = arxiv.Client()
39
 
40
- # Ensure query includes autism-related terms
41
- if 'autism' not in query.lower():
42
- search_query = f"(ti:{query} OR abs:{query}) AND (ti:autism OR abs:autism) AND cat:q-bio"
43
- else:
44
- search_query = f"(ti:{query} OR abs:{query}) AND cat:q-bio"
45
 
46
  # Search arXiv
47
  search = arxiv.Search(
@@ -52,12 +49,18 @@ def fetch_arxiv_papers(query, max_results=5):
52
 
53
  papers = []
54
  for result in client.results(search):
55
- papers.append({
56
- 'title': result.title,
57
- 'abstract': result.summary,
58
- 'url': result.pdf_url,
59
- 'published': result.published.strftime("%Y-%m-%d")
60
- })
 
 
 
 
 
 
61
 
62
  return papers
63
 
@@ -65,11 +68,8 @@ def fetch_pubmed_papers(query, max_results=5):
65
  """Fetch papers from PubMed"""
66
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
67
 
68
- # Ensure query includes autism-related terms
69
- if 'autism' not in query.lower():
70
- search_term = f"({query}) AND (autism[Title/Abstract] OR ASD[Title/Abstract])"
71
- else:
72
- search_term = query
73
 
74
  # Search for papers
75
  search_url = f"{base_url}/esearch.fcgi"
@@ -109,12 +109,19 @@ def fetch_pubmed_papers(query, max_results=5):
109
  pmid = article.find('.//PMID')
110
 
111
  if title is not None and abstract is not None:
112
- papers.append({
113
- 'title': title.text,
114
- 'abstract': abstract.text,
115
- 'url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid.text}/",
116
- 'published': year.text if year is not None else 'Unknown'
117
- })
 
 
 
 
 
 
 
118
 
119
  except Exception as e:
120
  st.error(f"Error fetching PubMed papers: {str(e)}")
@@ -130,14 +137,28 @@ def search_research_papers(query):
130
  all_papers = []
131
  for paper in arxiv_papers + pubmed_papers:
132
  if paper['abstract'] and len(paper['abstract'].strip()) > 0:
133
- all_papers.append({
134
- 'title': paper['title'],
135
- 'text': f"Title: {paper['title']}\n\nAbstract: {paper['abstract']}",
136
- 'url': paper['url'],
137
- 'published': paper['published']
138
- })
 
 
 
 
 
 
139
 
140
- return pd.DataFrame(all_papers)
 
 
 
 
 
 
 
 
141
 
142
  def generate_answer(question, context, max_length=512):
143
  """Generate a comprehensive answer using the local model"""
@@ -146,21 +167,24 @@ def generate_answer(question, context, max_length=512):
146
  if model is None or tokenizer is None:
147
  return "Error: Could not load the model. Please try again later."
148
 
 
 
 
149
  # Format the context as a structured query
150
- prompt = f"""Summarize the following research about autism and answer the question.
151
 
152
  Research Context:
153
  {context}
154
 
155
  Question: {question}
156
 
157
- Instructions: Based on the research context above, provide a comprehensive answer that covers:
158
- 1. Main findings from the research
159
- 2. Research methods used
160
- 3. Clinical implications
161
- 4. Limitations of the studies
162
 
163
- If the research doesn't address the question directly, explain what information is missing."""
164
 
165
  try:
166
  # Generate response
@@ -182,13 +206,13 @@ If the research doesn't address the question directly, explain what information
182
 
183
  # If response is too short or empty, provide a fallback message
184
  if len(response.strip()) < 50:
185
- return """I apologize, but I couldn't generate a specific answer from the research papers provided.
186
  This might be because:
187
- 1. The research papers don't directly address your question
188
- 2. The context needs more specific information
189
- 3. The question might need to be more specific
190
 
191
- Please try rephrasing your question or ask about a more specific aspect of autism."""
192
 
193
  # Format the response for better readability
194
  formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")
 
37
  """Fetch papers from arXiv"""
38
  client = arxiv.Client()
39
 
40
+ # Always include autism in the search query
41
+ search_query = f"(ti:autism OR abs:autism) AND (ti:\"{query}\" OR abs:\"{query}\") AND cat:q-bio"
 
 
 
42
 
43
  # Search arXiv
44
  search = arxiv.Search(
 
49
 
50
  papers = []
51
  for result in client.results(search):
52
+ # Only include papers that mention autism in title or abstract
53
+ if ('autism' in result.title.lower() or
54
+ 'asd' in result.title.lower() or
55
+ 'autism' in result.summary.lower() or
56
+ 'asd' in result.summary.lower()):
57
+ papers.append({
58
+ 'title': result.title,
59
+ 'abstract': result.summary,
60
+ 'url': result.pdf_url,
61
+ 'published': result.published.strftime("%Y-%m-%d"),
62
+ 'relevance_score': 1 if 'autism' in result.title.lower() else 0.5
63
+ })
64
 
65
  return papers
66
 
 
68
  """Fetch papers from PubMed"""
69
  base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
70
 
71
+ # Always include autism in the search term
72
+ search_term = f"(autism[Title/Abstract] OR ASD[Title/Abstract]) AND ({query}[Title/Abstract])"
 
 
 
73
 
74
  # Search for papers
75
  search_url = f"{base_url}/esearch.fcgi"
 
109
  pmid = article.find('.//PMID')
110
 
111
  if title is not None and abstract is not None:
112
+ title_text = title.text.lower()
113
+ abstract_text = abstract.text.lower()
114
+
115
+ # Only include papers that mention autism
116
+ if ('autism' in title_text or 'asd' in title_text or
117
+ 'autism' in abstract_text or 'asd' in abstract_text):
118
+ papers.append({
119
+ 'title': title.text,
120
+ 'abstract': abstract.text,
121
+ 'url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid.text}/",
122
+ 'published': year.text if year is not None else 'Unknown',
123
+ 'relevance_score': 1 if ('autism' in title_text or 'asd' in title_text) else 0.5
124
+ })
125
 
126
  except Exception as e:
127
  st.error(f"Error fetching PubMed papers: {str(e)}")
 
137
  all_papers = []
138
  for paper in arxiv_papers + pubmed_papers:
139
  if paper['abstract'] and len(paper['abstract'].strip()) > 0:
140
+ # Check if the paper is actually about autism
141
+ if ('autism' in paper['title'].lower() or
142
+ 'asd' in paper['title'].lower() or
143
+ 'autism' in paper['abstract'].lower() or
144
+ 'asd' in paper['abstract'].lower()):
145
+ all_papers.append({
146
+ 'title': paper['title'],
147
+ 'text': f"Title: {paper['title']}\n\nAbstract: {paper['abstract']}",
148
+ 'url': paper['url'],
149
+ 'published': paper['published'],
150
+ 'relevance_score': paper.get('relevance_score', 0.5)
151
+ })
152
 
153
+ # Sort papers by relevance score and convert to DataFrame
154
+ all_papers.sort(key=lambda x: x['relevance_score'], reverse=True)
155
+ df = pd.DataFrame(all_papers)
156
+
157
+ if df.empty:
158
+ st.warning("No autism-related papers found. Please try a different search term.")
159
+ return pd.DataFrame(columns=['title', 'text', 'url', 'published', 'relevance_score'])
160
+
161
+ return df
162
 
163
  def generate_answer(question, context, max_length=512):
164
  """Generate a comprehensive answer using the local model"""
 
167
  if model is None or tokenizer is None:
168
  return "Error: Could not load the model. Please try again later."
169
 
170
+ if not context or len(context.strip()) == 0:
171
+ return "No relevant autism research papers found to answer your question. Please try rephrasing your question."
172
+
173
  # Format the context as a structured query
174
+ prompt = f"""Based on the following autism research papers, provide a detailed answer:
175
 
176
  Research Context:
177
  {context}
178
 
179
  Question: {question}
180
 
181
+ Instructions: Provide a comprehensive answer that covers:
182
+ 1. Main findings about autism from the research
183
+ 2. Research methods used in autism studies
184
+ 3. Clinical implications for autism treatment
185
+ 4. Limitations of the autism studies
186
 
187
+ If the research papers don't directly address autism and the question, explain what information is missing."""
188
 
189
  try:
190
  # Generate response
 
206
 
207
  # If response is too short or empty, provide a fallback message
208
  if len(response.strip()) < 50:
209
+ return """I apologize, but I couldn't find specific information about autism in the research papers provided.
210
  This might be because:
211
+ 1. The research papers don't directly address autism
212
+ 2. The context needs more specific information about autism
213
+ 3. The question might need to be more focused on autism-specific aspects
214
 
215
+ Please try asking a more specific question about autism."""
216
 
217
  # Format the response for better readability
218
  formatted_response = response.replace(". ", ".\n").replace("• ", "\n• ")