Spaces:
Sleeping
Sleeping
Update pipeline.py
Browse files- pipeline.py +2 -10
pipeline.py
CHANGED
@@ -188,9 +188,6 @@ def classify_query(query: str) -> str:
|
|
188 |
################################################################################
|
189 |
|
190 |
def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
191 |
-
"""
|
192 |
-
Builds or loads a FAISS vector store for CSV documents containing 'Question' and 'Answers'.
|
193 |
-
"""
|
194 |
try:
|
195 |
if os.path.exists(store_dir):
|
196 |
print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
|
@@ -202,22 +199,18 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
|
202 |
df = pd.read_csv(csv_path)
|
203 |
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
204 |
df.columns = df.columns.str.strip()
|
205 |
-
|
206 |
-
# Fix possible column name variations
|
207 |
if "Answer" in df.columns:
|
208 |
df.rename(columns={"Answer": "Answers"}, inplace=True)
|
209 |
if "Question" not in df.columns and "Question " in df.columns:
|
210 |
df.rename(columns={"Question ": "Question"}, inplace=True)
|
211 |
if "Question" not in df.columns or "Answers" not in df.columns:
|
212 |
raise ValueError("CSV must have 'Question' and 'Answers' columns.")
|
213 |
-
|
214 |
docs = []
|
215 |
for _, row in df.iterrows():
|
216 |
q = str(row["Question"])
|
217 |
ans = str(row["Answers"])
|
218 |
doc = Document(page_content=ans, metadata={"question": q})
|
219 |
docs.append(doc)
|
220 |
-
|
221 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
222 |
vectorstore = FAISS.from_documents(docs, embedding=embeddings)
|
223 |
vectorstore.save_local(store_dir)
|
@@ -227,11 +220,11 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
|
227 |
raise RuntimeError(f"Error building/loading vector store: {str(e)}")
|
228 |
|
229 |
def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
|
230 |
-
"""Build RAG chain using the Gemini LLM."""
|
231 |
try:
|
232 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
233 |
chain = RetrievalQA.from_chain_type(
|
234 |
-
llm=gemini_llm,
|
235 |
chain_type="stuff",
|
236 |
retriever=retriever,
|
237 |
return_source_documents=True
|
@@ -239,7 +232,6 @@ def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
|
|
239 |
return chain
|
240 |
except Exception as e:
|
241 |
raise RuntimeError(f"Error building RAG chain: {str(e)}")
|
242 |
-
|
243 |
################################################################################
|
244 |
# Web Search Caching: Separate FAISS Vector Store
|
245 |
################################################################################
|
|
|
188 |
################################################################################
|
189 |
|
190 |
def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
|
|
|
|
|
|
191 |
try:
|
192 |
if os.path.exists(store_dir):
|
193 |
print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
|
|
|
199 |
df = pd.read_csv(csv_path)
|
200 |
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
201 |
df.columns = df.columns.str.strip()
|
|
|
|
|
202 |
if "Answer" in df.columns:
|
203 |
df.rename(columns={"Answer": "Answers"}, inplace=True)
|
204 |
if "Question" not in df.columns and "Question " in df.columns:
|
205 |
df.rename(columns={"Question ": "Question"}, inplace=True)
|
206 |
if "Question" not in df.columns or "Answers" not in df.columns:
|
207 |
raise ValueError("CSV must have 'Question' and 'Answers' columns.")
|
|
|
208 |
docs = []
|
209 |
for _, row in df.iterrows():
|
210 |
q = str(row["Question"])
|
211 |
ans = str(row["Answers"])
|
212 |
doc = Document(page_content=ans, metadata={"question": q})
|
213 |
docs.append(doc)
|
|
|
214 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
215 |
vectorstore = FAISS.from_documents(docs, embedding=embeddings)
|
216 |
vectorstore.save_local(store_dir)
|
|
|
220 |
raise RuntimeError(f"Error building/loading vector store: {str(e)}")
|
221 |
|
222 |
def build_rag_chain(vectorstore: FAISS) -> RetrievalQA:
|
223 |
+
"""Build RAG chain using the Gemini LLM directly without a custom class."""
|
224 |
try:
|
225 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
226 |
chain = RetrievalQA.from_chain_type(
|
227 |
+
llm=gemini_llm, # Directly use the ChatGoogleGenerativeAI instance
|
228 |
chain_type="stuff",
|
229 |
retriever=retriever,
|
230 |
return_source_documents=True
|
|
|
232 |
return chain
|
233 |
except Exception as e:
|
234 |
raise RuntimeError(f"Error building RAG chain: {str(e)}")
|
|
|
235 |
################################################################################
|
236 |
# Web Search Caching: Separate FAISS Vector Store
|
237 |
################################################################################
|