Phoenix21 commited on
Commit
ac8126f
·
verified ·
1 Parent(s): d329916

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +2 -10
pipeline.py CHANGED
@@ -188,9 +188,6 @@ def classify_query(query: str) -> str:
188
  ################################################################################
189
 
190
  def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
191
- """
192
- Builds or loads a FAISS vector store for CSV documents containing 'Question' and 'Answers'.
193
- """
194
  try:
195
  if os.path.exists(store_dir):
196
  print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
@@ -202,22 +199,18 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
202
  df = pd.read_csv(csv_path)
203
  df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
204
  df.columns = df.columns.str.strip()
205
-
206
- # Fix possible column name variations
207
  if "Answer" in df.columns:
208
  df.rename(columns={"Answer": "Answers"}, inplace=True)
209
  if "Question" not in df.columns and "Question " in df.columns:
210
  df.rename(columns={"Question ": "Question"}, inplace=True)
211
  if "Question" not in df.columns or "Answers" not in df.columns:
212
  raise ValueError("CSV must have 'Question' and 'Answers' columns.")
213
-
214
  docs = []
215
  for _, row in df.iterrows():
216
  q = str(row["Question"])
217
  ans = str(row["Answers"])
218
  doc = Document(page_content=ans, metadata={"question": q})
219
  docs.append(doc)
220
-
221
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
222
  vectorstore = FAISS.from_documents(docs, embedding=embeddings)
223
  vectorstore.save_local(store_dir)
@@ -227,11 +220,11 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
227
  raise RuntimeError(f"Error building/loading vector store: {str(e)}")
228
 
229
  def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
230
- """Build RAG chain using the Gemini LLM."""
231
  try:
232
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
233
  chain = RetrievalQA.from_chain_type(
234
- llm=gemini_llm,
235
  chain_type="stuff",
236
  retriever=retriever,
237
  return_source_documents=True
@@ -239,7 +232,6 @@ def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
239
  return chain
240
  except Exception as e:
241
  raise RuntimeError(f"Error building RAG chain: {str(e)}")
242
-
243
  ################################################################################
244
  # Web Search Caching: Separate FAISS Vector Store
245
  ################################################################################
 
188
  ################################################################################
189
 
190
  def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
 
 
 
191
  try:
192
  if os.path.exists(store_dir):
193
  print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
 
199
  df = pd.read_csv(csv_path)
200
  df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
201
  df.columns = df.columns.str.strip()
 
 
202
  if "Answer" in df.columns:
203
  df.rename(columns={"Answer": "Answers"}, inplace=True)
204
  if "Question" not in df.columns and "Question " in df.columns:
205
  df.rename(columns={"Question ": "Question"}, inplace=True)
206
  if "Question" not in df.columns or "Answers" not in df.columns:
207
  raise ValueError("CSV must have 'Question' and 'Answers' columns.")
 
208
  docs = []
209
  for _, row in df.iterrows():
210
  q = str(row["Question"])
211
  ans = str(row["Answers"])
212
  doc = Document(page_content=ans, metadata={"question": q})
213
  docs.append(doc)
 
214
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
215
  vectorstore = FAISS.from_documents(docs, embedding=embeddings)
216
  vectorstore.save_local(store_dir)
 
220
  raise RuntimeError(f"Error building/loading vector store: {str(e)}")
221
 
222
  def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
223
+ """Build RAG chain using the Gemini LLM directly without a custom class."""
224
  try:
225
  retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
226
  chain = RetrievalQA.from_chain_type(
227
+ llm=gemini_llm, # Directly use the ChatGoogleGenerativeAI instance
228
  chain_type="stuff",
229
  retriever=retriever,
230
  return_source_documents=True
 
232
  return chain
233
  except Exception as e:
234
  raise RuntimeError(f"Error building RAG chain: {str(e)}")
 
235
  ################################################################################
236
  # Web Search Caching: Separate FAISS Vector Store
237
  ################################################################################