Adventure123 commited on
Commit
6d21a48
·
verified ·
1 Parent(s): 5064436

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -28
app.py CHANGED
@@ -8,8 +8,6 @@ from langchain_community.vectorstores import Neo4jVector
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch
10
 
11
- print(f"Username Neo4j: {os.environ.get('NEO4J_USERNAME')}")
12
-
13
  # Custom Embedding Class
14
  class CustomHuggingFaceEmbeddings:
15
  def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
@@ -25,7 +23,7 @@ class CustomHuggingFaceEmbeddings:
25
  with torch.no_grad():
26
  outputs = self.model(**inputs)
27
  return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
28
-
29
  def embed_query(self, text):
30
  return self.embed_text(text)
31
 
@@ -37,19 +35,19 @@ class CustomHuggingFaceEmbeddings:
37
  def setup_vector_index():
38
  return Neo4jVector.from_existing_graph(
39
  CustomHuggingFaceEmbeddings(),
40
- url=os.environ.get('NEO4J_URI'),
41
- username=os.environ.get('NEO4J_USERNAME'),
42
- password=os.environ.get('NEO4J_PASSWORD'),
43
  index_name='articles',
44
  node_label="Article",
45
- text_node_properties=['topic', 'title', 'abstract'],
46
  embedding_node_property='embedding',
47
  )
48
 
49
  # Hugging Face API Setup
50
  API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
51
  MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
52
- client = InferenceClient(api_key=API_TOKEN)
53
 
54
  # Query Mistral
55
  def query_from_mistral(context: str, user_input: str):
@@ -69,26 +67,25 @@ def extract_data(documents):
69
  result = []
70
 
71
  for doc in documents:
72
- # Extract metadata
73
- publication_date = doc.metadata.get('publication_date')
74
- if publication_date:
75
- publication_date = publication_date.isoformat()
76
-
77
- # Extract page content
78
  page_content = doc.page_content.strip().split("\n")
79
- topic = page_content[1].strip() if len(page_content) > 1 else "N/A"
80
- title = page_content[2].strip() if len(page_content) > 2 else "N/A"
81
- abstract = page_content[3].strip() if len(page_content) > 3 else "N/A"
82
 
83
- # Format the extracted data as a string
84
- doc_data = (
85
- f"Publication Date: {publication_date}\n"
86
- f"Topic: {topic}\n"
87
- f"Title: {title}\n"
88
- f"Abstract: {abstract}\n"
89
- )
 
 
 
 
 
 
 
90
  result.append(doc_data)
91
-
92
  return result
93
 
94
  # Main Streamlit Application
@@ -113,11 +110,12 @@ def main():
113
  with st.spinner("Fetching response..."):
114
  try:
115
  # Retrieve context from the vector index
116
- context_results = vector_index.similarity_search(user_input, top_k=3)
117
- context = extract_data(context_results)[0]
 
118
 
119
  # Get response from Mistral
120
- response = query_from_mistral(context, user_input)
121
  st.session_state.messages.append({"role": "bot", "content": response})
122
  except Exception as e:
123
  st.error(f"Error: {e}")
 
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch
10
 
 
 
11
  # Custom Embedding Class
12
  class CustomHuggingFaceEmbeddings:
13
  def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
 
23
  with torch.no_grad():
24
  outputs = self.model(**inputs)
25
  return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
26
+
27
  def embed_query(self, text):
28
  return self.embed_text(text)
29
 
 
35
  def setup_vector_index():
36
  return Neo4jVector.from_existing_graph(
37
  CustomHuggingFaceEmbeddings(),
38
+ url=os.environ['NEO4J_URI'],
39
+ username=os.environ['NEO4J_USERNAME'],
40
+ password=os.environ['NEO4J_PASSWORD'],
41
  index_name='articles',
42
  node_label="Article",
43
+ text_node_properties=['name', 'abstract'],
44
  embedding_node_property='embedding',
45
  )
46
 
47
  # Hugging Face API Setup
48
  API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
49
  MISTRAL_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
50
+ client = InferenceClient(api_key=API_TOKEN, )
51
 
52
  # Query Mistral
53
  def query_from_mistral(context: str, user_input: str):
 
67
  result = []
68
 
69
  for doc in documents:
70
+ publication_date = doc.metadata.get('date_publication', "N/A")
 
 
 
 
 
71
  page_content = doc.page_content.strip().split("\n")
 
 
 
72
 
73
+ title = "N/A"
74
+ abstract = "N/A"
75
+
76
+ for line in page_content:
77
+ if line.lower().startswith("name:"):
78
+ title = line[len("name:"):].strip()
79
+ elif line.lower().startswith("abstract:"):
80
+ abstract = line[len("abstract:"):].strip()
81
+
82
+ doc_data = {
83
+ "Publication Date": publication_date,
84
+ "Title": title,
85
+ "Abstract": abstract,
86
+ }
87
  result.append(doc_data)
88
+
89
  return result
90
 
91
  # Main Streamlit Application
 
110
  with st.spinner("Fetching response..."):
111
  try:
112
  # Retrieve context from the vector index
113
+ context_results = vector_index.similarity_search(user_input, k=5)
114
+ context = "\n".join([f"Title: {doc['Title']}\nAbstract: {doc['Abstract']}\nPublication Date: {doc['Publication Date']}"
115
+ for doc in extract_data(context_results)])
116
 
117
  # Get response from Mistral
118
+ response = query_from_mistral(context.strip(), user_input)
119
  st.session_state.messages.append({"role": "bot", "content": response})
120
  except Exception as e:
121
  st.error(f"Error: {e}")