Spaces:

Phoenix21
/

Chatbot2

Sleeping

App Files Files Community

Phoenix21 commited on Jan 9

Commit

b0c64f6

verified ·

1 Parent(s): 5ee5862

Create pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +169 -0

pipeline.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# pipeline.py
+import os
+import getpass
+import pandas as pd
+from typing import Optional
+from langchain.docstore.document import Document
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
+import litellm
+# We import the chain builders from our separate files
+from classification_chain import get_classification_chain
+from refusal_chain import get_refusal_chain
+from tailor_chain import get_tailor_chain
+from cleaner_chain import get_cleaner_chain, CleanerChain
+# We also import the relevant RAG logic here or define it directly
+# (We define build_rag_chain in this file for clarity)
+###############################################################################
+# 1) Environment: set up keys if missing
+###############################################################################
+if not os.environ.get("GEMINI_API_KEY"):
+    os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")
+if not os.environ.get("GROQ_API_KEY"):
+    os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API Key: ")
+###############################################################################
+# 2) build_or_load_vectorstore
+###############################################################################
+def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
+    if os.path.exists(store_dir):
+        print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
+        vectorstore = FAISS.load_local(store_dir, embeddings)
+        return vectorstore
+    else:
+        print(f"DEBUG: Building new store from CSV: {csv_path}")
+        df = pd.read_csv(csv_path)
+        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
+        df.columns = df.columns.str.strip()
+        if "Answer" in df.columns:
+            df.rename(columns={"Answer": "Answers"}, inplace=True)
+        if "Question" not in df.columns and "Question " in df.columns:
+            df.rename(columns={"Question ": "Question"}, inplace=True)
+        if "Question" not in df.columns or "Answers" not in df.columns:
+            raise ValueError("CSV must have 'Question' and 'Answers' columns.")
+        docs = []
+        for _, row in df.iterrows():
+            q = str(row["Question"])
+            ans = str(row["Answers"])
+            doc = Document(page_content=ans, metadata={"question": q})
+            docs.append(doc)
+        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
+        vectorstore = FAISS.from_documents(docs, embedding=embeddings)
+        vectorstore.save_local(store_dir)
+        return vectorstore
+###############################################################################
+# 3) Build RAG chain for Gemini
+###############################################################################
+from langchain.llms.base import LLM
+def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
+    class GeminiLangChainLLM(LLM):
+        def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
+            messages = [{"role": "user", "content": prompt}]
+            return llm_model(messages, stop_sequences=stop)
+        @property
+        def _llm_type(self) -> str:
+            return "custom_gemini"
+    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
+    gemini_as_llm = GeminiLangChainLLM()
+    rag_chain = RetrievalQA.from_chain_type(
+        llm=gemini_as_llm,
+        chain_type="stuff",
+        retriever=retriever,
+        return_source_documents=True
+    )
+    return rag_chain
+###############################################################################
+# 4) Initialize all the separate chains
+###############################################################################
+# Classification chain
+classification_chain = get_classification_chain()
+# Refusal chain
+refusal_chain = get_refusal_chain()
+# Tailor chain
+tailor_chain = get_tailor_chain()
+# Cleaner chain
+cleaner_chain = get_cleaner_chain()
+###############################################################################
+# 5) Build our vectorstores + RAG chains
+###############################################################################
+wellness_csv = "AIChatbot.csv"
+brand_csv = "BrandAI.csv"
+wellness_store_dir = "faiss_wellness_store"
+brand_store_dir = "faiss_brand_store"
+wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
+brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
+gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
+wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
+brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
+###############################################################################
+# 6) Tools / Agents for web search
+###############################################################################
+search_tool = DuckDuckGoSearchTool()
+web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
+managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
+manager_agent = CodeAgent(tools=[], model=gemini_llm, managed_agents=[managed_web_agent])
+def do_web_search(query: str) -> str:
+    print("DEBUG: Attempting web search for more info...")
+    search_query = f"Give me relevant info: {query}"
+    response = manager_agent.run(search_query)
+    return response
+###############################################################################
+# 7) Orchestrator: run_with_chain
+###############################################################################
+def run_with_chain(query: str) -> str:
+    print("DEBUG: Starting run_with_chain...")
+    # 1) Classify
+    class_result = classification_chain.invoke({"query": query})
+    classification = class_result.get("text", "").strip()
+    print("DEBUG: Classification =>", classification)
+    # If OutOfScope => refusal => tailor => return
+    if classification == "OutOfScope":
+        refusal_text = refusal_chain.run({})
+        final_refusal = tailor_chain.run({"response": refusal_text})
+        return final_refusal.strip()
+    # If Wellness => wellness RAG => if insufficient => web => unify => tailor
+    if classification == "Wellness":
+        rag_result = wellness_rag_chain({"query": query})
+        csv_answer = rag_result["result"].strip()
+        if not csv_answer:
+            web_answer = do_web_search(query)
+        else:
+            lower_ans = csv_answer.lower()
+            if any(phrase in lower_ans for phrase in ["i do not know", "not sure", "no context", "cannot answer"]):
+                web_answer = do_web_search(query)
+            else:
+                web_answer = ""
+        final_merged = cleaner_chain.merge(kb=csv_answer, web=web_answer)
+        final_answer = tailor_chain.run({"response": final_merged})
+        return final_answer.strip()
+    # If Brand => brand RAG => tailor => return
+    if classification == "Brand":
+        rag_result = brand_rag_chain({"query": query})
+        csv_answer = rag_result["result"].strip()
+        final_merged = cleaner_chain.merge(kb=csv_answer, web="")
+        final_answer = tailor_chain.run({"response": final_merged})
+        return final_answer.strip()
+    # fallback
+    refusal_text = refusal_chain.run({})
+    final_refusal = tailor_chain.run({"response": refusal_text})
+    return final_refusal.strip()