Spaces:

wakeupmh
/

ama-autism

Sleeping

App Files Files Community

wakeupmh commited on Feb 15

Commit

cc41495

1 Parent(s): 54a5022

fix: search

Browse files

Files changed (2) hide show

app.py +7 -2
faiss_index/index.py +30 -21

app.py CHANGED Viewed

@@ -33,8 +33,12 @@ def load_dataset(query):
     # Always fetch fresh results for the specific query
     with st.spinner("Searching autism research papers..."):
         import faiss_index.index as idx
-        # Make the query more specific to autism and b12
-        search_query = f"{query} AND (cat:q-bio.NC OR cat:q-bio.QM OR cat:q-bio.GN OR cat:q-bio.CB OR cat:q-bio.MN)"
         papers = idx.fetch_arxiv_papers(search_query, max_results=25)
         if not papers:
             st.warning("No relevant papers found. Please try rephrasing your question.")
@@ -88,6 +92,7 @@ def generate_answer(question, context, max_length=150):
 # Streamlit App
 st.title("🧩 AMA Autism")
 query = st.text_input("Please ask me anything about autism ✨")
 if query:

     # Always fetch fresh results for the specific query
     with st.spinner("Searching autism research papers..."):
         import faiss_index.index as idx
+        # Ensure both autism and the query terms are included
+        if 'autism' not in query.lower():
+            search_query = f"autism {query}"
+        else:
+            search_query = query
         papers = idx.fetch_arxiv_papers(search_query, max_results=25)
         if not papers:
             st.warning("No relevant papers found. Please try rephrasing your question.")
 # Streamlit App
 st.title("🧩 AMA Autism")
+st.write("This app searches through scientific papers to answer your questions about autism. For best results, be specific in your questions.")
 query = st.text_input("Please ask me anything about autism ✨")
 if query:

faiss_index/index.py CHANGED Viewed

@@ -18,34 +18,43 @@ def fetch_arxiv_papers(query, max_results=10):
     """Fetch papers from arXiv and format them for RAG"""
     client = arxiv.Client()
-    # Construct a more focused search query
-    search_terms = query.lower().split()
-    if 'autism' not in search_terms:
-        search_terms.insert(0, 'autism')
-    # Add specific category filters for medical and biological papers
-    search_query = f"({' AND '.join(search_terms)}) AND (cat:q-bio.NC OR cat:q-bio.QM OR cat:q-bio.GN OR cat:q-bio.CB OR cat:q-bio.MN)"
     search = arxiv.Search(
         query=search_query,
-        max_results=max_results,
         sort_by=arxiv.SortCriterion.Relevance
     )
-    results = list(client.results(search))
-    papers = []
-    # Filter results to ensure they're relevant to autism
-    for i, result in enumerate(results):
-        if 'autism' in result.title.lower() or 'autism' in result.summary.lower():
-            papers.append({
-                "id": str(i),
-                "text": result.summary,
-                "title": result.title
-            })
-    logging.info(f"Fetched {len(papers)} relevant papers from arXiv")
-    return papers
 def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     """Build and save dataset with FAISS index for RAG"""

     """Fetch papers from arXiv and format them for RAG"""
     client = arxiv.Client()
+    # Clean and prepare the search query
+    query = query.replace('and', '').strip()  # Remove 'and' as it's treated as AND operator
+    terms = [term.strip() for term in query.split() if term.strip()]
+    # Create a more flexible search query
+    search_query = ' OR '.join([f'abs:"{term}" OR ti:"{term}"' for term in terms])
+    search_query = f'({search_query}) AND (cat:q-bio* OR cat:med*)'
+    logging.info(f"Searching arXiv with query: {search_query}")
     search = arxiv.Search(
         query=search_query,
+        max_results=max_results * 2,  # Get more results to filter
         sort_by=arxiv.SortCriterion.Relevance
     )
+    try:
+        results = list(client.results(search))
+        papers = []
+        for i, result in enumerate(results):
+            # Include paper if it contains any of the search terms
+            text = (result.title + " " + result.summary).lower()
+            if any(term.lower() in text for term in terms):
+                papers.append({
+                    "id": str(i),
+                    "text": result.summary,
+                    "title": result.title
+                })
+                if len(papers) >= max_results:
+                    break
+        logging.info(f"Found {len(papers)} relevant papers from arXiv")
+        return papers
+    except Exception as e:
+        logging.error(f"Error fetching papers from arXiv: {str(e)}")
+        return []
 def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     """Build and save dataset with FAISS index for RAG"""