Spaces:

Adventure123
/

Chatbot-Intro-DSDE

Sleeping

App Files Files Community

Adventure123 commited on Dec 5, 2024

Commit

6d21a48

verified ·

1 Parent(s): 5064436

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -28

app.py CHANGED Viewed

@@ -8,8 +8,6 @@ from langchain_community.vectorstores import Neo4jVector
 from transformers import AutoTokenizer, AutoModel
 import torch
-print(f"Username Neo4j: {os.environ.get('NEO4J_USERNAME')}")
 # Custom Embedding Class
 class CustomHuggingFaceEmbeddings:
     def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
@@ -25,7 +23,7 @@ class CustomHuggingFaceEmbeddings:
         with torch.no_grad():
             outputs = self.model(**inputs)
         return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
     def embed_query(self, text):
         return self.embed_text(text)
@@ -37,19 +35,19 @@ class CustomHuggingFaceEmbeddings:
 def setup_vector_index():
     return Neo4jVector.from_existing_graph(
         CustomHuggingFaceEmbeddings(),
-        url=os.environ.get('NEO4J_URI'),
-        username=os.environ.get('NEO4J_USERNAME'),
-        password=os.environ.get('NEO4J_PASSWORD'),
         index_name='articles',
         node_label="Article",
-        text_node_properties=['topic', 'title', 'abstract'],
         embedding_node_property='embedding',
     )
 # Hugging Face API Setup
 API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
 MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
-client = InferenceClient(api_key=API_TOKEN)
 # Query Mistral
 def query_from_mistral(context: str, user_input: str):
@@ -69,26 +67,25 @@ def extract_data(documents):
     result = []
     for doc in documents:
-        # Extract metadata
-        publication_date = doc.metadata.get('publication_date')
-        if publication_date:
-            publication_date = publication_date.isoformat()
-        # Extract page content
         page_content = doc.page_content.strip().split("\n")
-        topic = page_content[1].strip() if len(page_content) > 1 else "N/A"
-        title = page_content[2].strip() if len(page_content) > 2 else "N/A"
-        abstract = page_content[3].strip() if len(page_content) > 3 else "N/A"
-        # Format the extracted data as a string
-        doc_data = (
-            f"Publication Date: {publication_date}\n"
-            f"Topic: {topic}\n"
-            f"Title: {title}\n"
-            f"Abstract: {abstract}\n"
-        )
         result.append(doc_data)
     return result
 # Main Streamlit Application
@@ -113,11 +110,12 @@ def main():
         with st.spinner("Fetching response..."):
             try:
                 # Retrieve context from the vector index
-                context_results = vector_index.similarity_search(user_input, top_k=3)
-                context = extract_data(context_results)[0]
                 # Get response from Mistral
-                response = query_from_mistral(context, user_input)
                 st.session_state.messages.append({"role": "bot", "content": response})
             except Exception as e:
                 st.error(f"Error: {e}")

 from transformers import AutoTokenizer, AutoModel
 import torch
 # Custom Embedding Class
 class CustomHuggingFaceEmbeddings:
     def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
         with torch.no_grad():
             outputs = self.model(**inputs)
         return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
     def embed_query(self, text):
         return self.embed_text(text)
 def setup_vector_index():
     return Neo4jVector.from_existing_graph(
         CustomHuggingFaceEmbeddings(),
+        url=os.environ['NEO4J_URI'],
+        username=os.environ['NEO4J_USERNAME'],
+        password=os.environ['NEO4J_PASSWORD'],
         index_name='articles',
         node_label="Article",
+        text_node_properties=['name', 'abstract'],
         embedding_node_property='embedding',
     )
 # Hugging Face API Setup
 API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
 MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
+client = InferenceClient(api_key=API_TOKEN, )
 # Query Mistral
 def query_from_mistral(context: str, user_input: str):
     result = []
     for doc in documents:
+        publication_date = doc.metadata.get('date_publication', "N/A")
         page_content = doc.page_content.strip().split("\n")
+        title = "N/A"
+        abstract = "N/A"
+        for line in page_content:
+            if line.lower().startswith("name:"):
+                title = line[len("name:"):].strip()
+            elif line.lower().startswith("abstract:"):
+                abstract = line[len("abstract:"):].strip()
+        doc_data = {
+            "Publication Date": publication_date,
+            "Title": title,
+            "Abstract": abstract,
+        }
         result.append(doc_data)
     return result
 # Main Streamlit Application
         with st.spinner("Fetching response..."):
             try:
                 # Retrieve context from the vector index
+                context_results = vector_index.similarity_search(user_input, k=5)
+                context = "\n".join([f"Title: {doc['Title']}\nAbstract: {doc['Abstract']}\nPublication Date: {doc['Publication Date']}"
+                                     for doc in extract_data(context_results)])
                 # Get response from Mistral
+                response = query_from_mistral(context.strip(), user_input)
                 st.session_state.messages.append({"role": "bot", "content": response})
             except Exception as e:
                 st.error(f"Error: {e}")