Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,6 @@ from langchain_community.vectorstores import Neo4jVector
|
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch
|
10 |
|
11 |
-
print(f"Username Neo4j: {os.environ.get('NEO4J_USERNAME')}")
|
12 |
-
|
13 |
# Custom Embedding Class
|
14 |
class CustomHuggingFaceEmbeddings:
|
15 |
def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
|
@@ -25,7 +23,7 @@ class CustomHuggingFaceEmbeddings:
|
|
25 |
with torch.no_grad():
|
26 |
outputs = self.model(**inputs)
|
27 |
return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
|
28 |
-
|
29 |
def embed_query(self, text):
|
30 |
return self.embed_text(text)
|
31 |
|
@@ -37,19 +35,19 @@ class CustomHuggingFaceEmbeddings:
|
|
37 |
def setup_vector_index():
|
38 |
return Neo4jVector.from_existing_graph(
|
39 |
CustomHuggingFaceEmbeddings(),
|
40 |
-
url=os.environ
|
41 |
-
username=os.environ
|
42 |
-
password=os.environ
|
43 |
index_name='articles',
|
44 |
node_label="Article",
|
45 |
-
text_node_properties=['
|
46 |
embedding_node_property='embedding',
|
47 |
)
|
48 |
|
49 |
# Hugging Face API Setup
|
50 |
API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
|
51 |
MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
52 |
-
client = InferenceClient(api_key=API_TOKEN)
|
53 |
|
54 |
# Query Mistral
|
55 |
def query_from_mistral(context: str, user_input: str):
|
@@ -69,26 +67,25 @@ def extract_data(documents):
|
|
69 |
result = []
|
70 |
|
71 |
for doc in documents:
|
72 |
-
|
73 |
-
publication_date = doc.metadata.get('publication_date')
|
74 |
-
if publication_date:
|
75 |
-
publication_date = publication_date.isoformat()
|
76 |
-
|
77 |
-
# Extract page content
|
78 |
page_content = doc.page_content.strip().split("\n")
|
79 |
-
topic = page_content[1].strip() if len(page_content) > 1 else "N/A"
|
80 |
-
title = page_content[2].strip() if len(page_content) > 2 else "N/A"
|
81 |
-
abstract = page_content[3].strip() if len(page_content) > 3 else "N/A"
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
result.append(doc_data)
|
91 |
-
|
92 |
return result
|
93 |
|
94 |
# Main Streamlit Application
|
@@ -113,11 +110,12 @@ def main():
|
|
113 |
with st.spinner("Fetching response..."):
|
114 |
try:
|
115 |
# Retrieve context from the vector index
|
116 |
-
context_results = vector_index.similarity_search(user_input,
|
117 |
-
context =
|
|
|
118 |
|
119 |
# Get response from Mistral
|
120 |
-
response = query_from_mistral(context, user_input)
|
121 |
st.session_state.messages.append({"role": "bot", "content": response})
|
122 |
except Exception as e:
|
123 |
st.error(f"Error: {e}")
|
|
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch
|
10 |
|
|
|
|
|
11 |
# Custom Embedding Class
|
12 |
class CustomHuggingFaceEmbeddings:
|
13 |
def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
|
|
|
23 |
with torch.no_grad():
|
24 |
outputs = self.model(**inputs)
|
25 |
return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
|
26 |
+
|
27 |
def embed_query(self, text):
|
28 |
return self.embed_text(text)
|
29 |
|
|
|
35 |
def setup_vector_index():
|
36 |
return Neo4jVector.from_existing_graph(
|
37 |
CustomHuggingFaceEmbeddings(),
|
38 |
+
url=os.environ['NEO4J_URI'],
|
39 |
+
username=os.environ['NEO4J_USERNAME'],
|
40 |
+
password=os.environ['NEO4J_PASSWORD'],
|
41 |
index_name='articles',
|
42 |
node_label="Article",
|
43 |
+
text_node_properties=['name', 'abstract'],
|
44 |
embedding_node_property='embedding',
|
45 |
)
|
46 |
|
47 |
# Hugging Face API Setup
|
48 |
API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
|
49 |
MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
50 |
+
client = InferenceClient(api_key=API_TOKEN, )
|
51 |
|
52 |
# Query Mistral
|
53 |
def query_from_mistral(context: str, user_input: str):
|
|
|
67 |
result = []
|
68 |
|
69 |
for doc in documents:
|
70 |
+
publication_date = doc.metadata.get('date_publication', "N/A")
|
|
|
|
|
|
|
|
|
|
|
71 |
page_content = doc.page_content.strip().split("\n")
|
|
|
|
|
|
|
72 |
|
73 |
+
title = "N/A"
|
74 |
+
abstract = "N/A"
|
75 |
+
|
76 |
+
for line in page_content:
|
77 |
+
if line.lower().startswith("name:"):
|
78 |
+
title = line[len("name:"):].strip()
|
79 |
+
elif line.lower().startswith("abstract:"):
|
80 |
+
abstract = line[len("abstract:"):].strip()
|
81 |
+
|
82 |
+
doc_data = {
|
83 |
+
"Publication Date": publication_date,
|
84 |
+
"Title": title,
|
85 |
+
"Abstract": abstract,
|
86 |
+
}
|
87 |
result.append(doc_data)
|
88 |
+
|
89 |
return result
|
90 |
|
91 |
# Main Streamlit Application
|
|
|
110 |
with st.spinner("Fetching response..."):
|
111 |
try:
|
112 |
# Retrieve context from the vector index
|
113 |
+
context_results = vector_index.similarity_search(user_input, k=5)
|
114 |
+
context = "\n".join([f"Title: {doc['Title']}\nAbstract: {doc['Abstract']}\nPublication Date: {doc['Publication Date']}"
|
115 |
+
for doc in extract_data(context_results)])
|
116 |
|
117 |
# Get response from Mistral
|
118 |
+
response = query_from_mistral(context.strip(), user_input)
|
119 |
st.session_state.messages.append({"role": "bot", "content": response})
|
120 |
except Exception as e:
|
121 |
st.error(f"Error: {e}")
|