Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,11 +3,23 @@ warnings.simplefilter("ignore", category=FutureWarning)
|
|
3 |
|
4 |
import os
|
5 |
import streamlit as st
|
|
|
6 |
from huggingface_hub import InferenceClient
|
7 |
from langchain_community.vectorstores import Neo4jVector
|
8 |
from transformers import AutoTokenizer, AutoModel
|
9 |
import torch
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Custom Embedding Class
|
12 |
class CustomHuggingFaceEmbeddings:
|
13 |
def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
|
@@ -44,11 +56,6 @@ def setup_vector_index():
|
|
44 |
embedding_node_property='embedding',
|
45 |
)
|
46 |
|
47 |
-
# Hugging Face API Setup
|
48 |
-
API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
|
49 |
-
MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
50 |
-
client = InferenceClient(api_key=API_TOKEN, )
|
51 |
-
|
52 |
# Query Mistral
|
53 |
def query_from_mistral(context: str, user_input: str):
|
54 |
messages = [
|
@@ -62,6 +69,17 @@ def query_from_mistral(context: str, user_input: str):
|
|
62 |
)
|
63 |
return completion.choices[0].message["content"]
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
# extract data from retriever response
|
66 |
def extract_data(documents):
|
67 |
result = []
|
@@ -79,10 +97,14 @@ def extract_data(documents):
|
|
79 |
elif line.lower().startswith("abstract:"):
|
80 |
abstract = line[len("abstract:"):].strip()
|
81 |
|
|
|
|
|
|
|
82 |
doc_data = {
|
83 |
"Publication Date": publication_date,
|
84 |
"Title": title,
|
85 |
"Abstract": abstract,
|
|
|
86 |
}
|
87 |
result.append(doc_data)
|
88 |
|
@@ -91,8 +113,25 @@ def extract_data(documents):
|
|
91 |
# Main Streamlit Application
|
92 |
def main():
|
93 |
st.set_page_config(page_title="Vector Chat with Mistral", layout="centered")
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
# Initialize the vector index
|
98 |
vector_index = setup_vector_index()
|
@@ -115,8 +154,17 @@ def main():
|
|
115 |
st.warning("No relevant context found. Please refine your query.")
|
116 |
response = "I'm sorry, I couldn't find any relevant information to answer your question."
|
117 |
else:
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
response = query_from_mistral(context.strip(), user_input)
|
121 |
|
122 |
st.session_state.messages.append({"role": "bot", "content": response})
|
|
|
3 |
|
4 |
import os
|
5 |
import streamlit as st
|
6 |
+
from neo4j import GraphDatabase
|
7 |
from huggingface_hub import InferenceClient
|
8 |
from langchain_community.vectorstores import Neo4jVector
|
9 |
from transformers import AutoTokenizer, AutoModel
|
10 |
import torch
|
11 |
|
12 |
+
# Hugging Face API Setup
|
13 |
+
API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
|
14 |
+
MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
15 |
+
client = InferenceClient(api_key=API_TOKEN, )
|
16 |
+
|
17 |
+
# Driver neo4j
|
18 |
+
driver = GraphDatabase.driver(
|
19 |
+
os.environ['NEO4J_URI'],
|
20 |
+
auth=(os.environ['NEO4J_USERNAME'], os.environ['NEO4J_PASSWORD'])
|
21 |
+
)
|
22 |
+
|
23 |
# Custom Embedding Class
|
24 |
class CustomHuggingFaceEmbeddings:
|
25 |
def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
|
|
|
56 |
embedding_node_property='embedding',
|
57 |
)
|
58 |
|
|
|
|
|
|
|
|
|
|
|
59 |
# Query Mistral
|
60 |
def query_from_mistral(context: str, user_input: str):
|
61 |
messages = [
|
|
|
69 |
)
|
70 |
return completion.choices[0].message["content"]
|
71 |
|
72 |
+
# Find keywords
|
73 |
+
def query_article_keywords(name):
|
74 |
+
with driver.session() as session:
|
75 |
+
query = """
|
76 |
+
MATCH (a:Article)-[:CONTAIN]->(k:Keyword)
|
77 |
+
WHERE a.name = $name
|
78 |
+
RETURN k
|
79 |
+
"""
|
80 |
+
result = session.run(query, name=name)
|
81 |
+
return [record["k"] for record in result]
|
82 |
+
|
83 |
# extract data from retriever response
|
84 |
def extract_data(documents):
|
85 |
result = []
|
|
|
97 |
elif line.lower().startswith("abstract:"):
|
98 |
abstract = line[len("abstract:"):].strip()
|
99 |
|
100 |
+
keywords = query_article_keywords(title)
|
101 |
+
keywords = [dict(node)['text'] for node in keywords]
|
102 |
+
|
103 |
doc_data = {
|
104 |
"Publication Date": publication_date,
|
105 |
"Title": title,
|
106 |
"Abstract": abstract,
|
107 |
+
"keywords": ','.join(keywords)
|
108 |
}
|
109 |
result.append(doc_data)
|
110 |
|
|
|
113 |
# Main Streamlit Application
|
114 |
def main():
|
115 |
st.set_page_config(page_title="Vector Chat with Mistral", layout="centered")
|
116 |
+
|
117 |
+
# App description and features
|
118 |
+
st.title("🤖 RAG with Mistral")
|
119 |
+
st.markdown("""
|
120 |
+
## Description:
|
121 |
+
Chat with **Mistral-7B-Instruct** using context retrieved from a **Neo4j** vector index. This app allows you to ask questions, and the assistant will provide real-time, context-driven answers by querying relevant articles and their keywords from the database.
|
122 |
+
""")
|
123 |
+
|
124 |
+
st.image(image="image.jpg", caption="Neo4j")
|
125 |
+
|
126 |
+
st.markdown("""
|
127 |
+
## Key Features:
|
128 |
+
- **Real-time context search** from a Neo4j vector index.
|
129 |
+
- **Integration with Mistral-7B-Instruct model** for natural language processing.
|
130 |
+
- **Keyword extraction** from relevant articles for enhanced context-based responses.
|
131 |
+
|
132 |
+
## GitHub Repository:
|
133 |
+
You can find the source code and more information about this app on GitHub: [GitHub Repository Link](https://github.com/yourusername/your-repository-name)
|
134 |
+
""")
|
135 |
|
136 |
# Initialize the vector index
|
137 |
vector_index = setup_vector_index()
|
|
|
154 |
st.warning("No relevant context found. Please refine your query.")
|
155 |
response = "I'm sorry, I couldn't find any relevant information to answer your question."
|
156 |
else:
|
157 |
+
data_dict = extract_data(context_results)
|
158 |
+
|
159 |
+
# convert to string
|
160 |
+
context = '\n'.join([
|
161 |
+
f"Title: {doc['Title']}\n"
|
162 |
+
f"Abstract: {doc['Abstract']}\n"
|
163 |
+
f"Publication Date: {doc['Publication Date']}\n"
|
164 |
+
f"Keywords: {doc['keywords']}"
|
165 |
+
for doc in data_dict
|
166 |
+
])
|
167 |
+
|
168 |
response = query_from_mistral(context.strip(), user_input)
|
169 |
|
170 |
st.session_state.messages.append({"role": "bot", "content": response})
|