Spaces:

wakeupmh
/

ama-autism

Sleeping

App Files Files Community

wakeupmh commited on Feb 15

Commit

54a5022

1 Parent(s): cc0b0d6

fix: embeddings

Browse files

Files changed (2) hide show

app.py +27 -20
faiss_index/index.py +12 -0

app.py CHANGED Viewed

@@ -34,8 +34,12 @@ def load_dataset(query):
     with st.spinner("Searching autism research papers..."):
         import faiss_index.index as idx
         # Make the query more specific to autism and b12
-        search_query = f"autism {query} AND (cat:q-bio.NC OR cat:q-bio.QM OR cat:q-bio.GN OR cat:q-bio.CB OR cat:q-bio.MN)"
         papers = idx.fetch_arxiv_papers(search_query, max_results=25)
         idx.build_faiss_index(papers, dataset_dir=DATASET_DIR)
     # Load and convert to pandas for easier handling
@@ -91,22 +95,25 @@ if query:
         # Load dataset
         df = load_dataset(query)
-        # Get relevant context
-        context = "\n".join([
-            f"{text[:1000]}" for text in df['text'].head(3)
-        ])
-        # Generate answer
-        answer = generate_answer(query, context)
-    if answer and not answer.isspace():
-        st.success("Answer found!")
-        st.write(answer)
-        st.write("### Sources Used:")
-        for _, row in df.head(3).iterrows():
-            st.write(f"**Title:** {row['title']}")
-            st.write(f"**Summary:** {row['text'][:200]}...")
-            st.write("---")
-    else:
-        st.warning("I couldn't find a specific answer in the research papers. Try rephrasing your question.")

     with st.spinner("Searching autism research papers..."):
         import faiss_index.index as idx
         # Make the query more specific to autism and b12
+        search_query = f"{query} AND (cat:q-bio.NC OR cat:q-bio.QM OR cat:q-bio.GN OR cat:q-bio.CB OR cat:q-bio.MN)"
         papers = idx.fetch_arxiv_papers(search_query, max_results=25)
+        if not papers:
+            st.warning("No relevant papers found. Please try rephrasing your question.")
+            return pd.DataFrame(columns=['title', 'text'])
         idx.build_faiss_index(papers, dataset_dir=DATASET_DIR)
     # Load and convert to pandas for easier handling
         # Load dataset
         df = load_dataset(query)
+        if df.empty:
+            st.warning("I couldn't find any relevant research papers about this topic. Please try rephrasing your question or ask something else about autism.")
+        else:
+            # Get relevant context
+            context = "\n".join([
+                f"{text[:1000]}" for text in df['text'].head(3)
+            ])
+            # Generate answer
+            answer = generate_answer(query, context)
+            if answer and not answer.isspace():
+                st.success("Answer found!")
+                st.write(answer)
+                st.write("### Sources used:")
+                for _, row in df.head(3).iterrows():
+                    st.write(f"**Title:** {row['title']}")
+                    st.write(f"**Summary:** {row['text'][:200]}...")
+                    st.write("---")
+            else:
+                st.warning("I couldn't find a specific answer in the research papers. Try rephrasing your question.")

faiss_index/index.py CHANGED Viewed

@@ -49,6 +49,18 @@ def fetch_arxiv_papers(query, max_results=10):
 def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     """Build and save dataset with FAISS index for RAG"""
     # Initialize smaller DPR encoder
     ctx_encoder = DPRContextEncoder.from_pretrained(
         "facebook/dpr-ctx_encoder-single-nq-base",

 def build_faiss_index(papers, dataset_dir=DATASET_DIR):
     """Build and save dataset with FAISS index for RAG"""
+    if not papers:
+        logging.warning("No papers found. Creating empty dataset.")
+        # Create an empty dataset with the expected structure
+        dataset = Dataset.from_dict({
+            "text": [],
+            "embeddings": [],
+            "title": []
+        })
+        os.makedirs(dataset_dir, exist_ok=True)
+        dataset.save_to_disk(os.path.join(dataset_dir, "dataset"))
+        return dataset_dir
     # Initialize smaller DPR encoder
     ctx_encoder = DPRContextEncoder.from_pretrained(
         "facebook/dpr-ctx_encoder-single-nq-base",