Spaces:

Daemontatox
/

Mawared-Support-Assistant

Running

App Files Files Community

Daemontatox commited on Jan 9

Commit

8359d12

verified ·

1 Parent(s): 3365a48

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -58

app.py CHANGED Viewed

@@ -1,48 +1,34 @@
-import os
-from dotenv import load_dotenv
 from langchain_community.vectorstores import Qdrant
 from langchain_huggingface import HuggingFaceEmbeddings
-from langchain.llms import HuggingFacePipeline
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.prompts import ChatPromptTemplate
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from qdrant_client import QdrantClient, models
 from langchain_qdrant import Qdrant
-import gradio as gr
-import torch
-import spaces
 # Load environment variables
 load_dotenv()
-# Verify environment variables
-qdrant_url = os.getenv("QDRANT_URL")
-qdrant_api_key = os.getenv("QDRANT_API_KEY")
-print(f"QDRANT_URL: {qdrant_url}")
-print(f"QDRANT_API_KEY: {qdrant_api_key}")
 # HuggingFace Embeddings
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
 # Qdrant Client Setup
 client = QdrantClient(
-    url=qdrant_url,
-    api_key=qdrant_api_key,
-    #prefer_grpc=True
 )
-collection_name="mawared"
-# Check if the connection is successful
-try:
-    client.get_collection(collection_name)
-    print(f"Successfully connected to Qdrant collection: {collection_name}")
-except Exception as e:
-    print(f"Failed to connect to Qdrant: {e}")
-    raise e
 # Try to create collection, handle if it already exists
 try:
@@ -52,6 +38,7 @@ try:
             size=768,  # GTE-large embedding size
             distance=models.Distance.COSINE
         ),
     )
     print(f"Created new collection: {collection_name}")
 except Exception as e:
@@ -73,20 +60,8 @@ retriever = db.as_retriever(
     search_kwargs={"k": 5}
 )
-# Load Hugging Face Model
-model_name = "NousResearch/Hermes-3-Llama-3.2-3B"  # Replace with your desired model
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
-# Ensure the model is on the GPU
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-# Create Hugging Face Pipeline with the specified model and tokenizer
-hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# LangChain LLM using Hugging Face Pipeline
-llm = HuggingFacePipeline(pipeline=hf_pipeline)
 # Create prompt template
 template = """
@@ -117,7 +92,7 @@ Answer
 prompt = ChatPromptTemplate.from_template(template)
-# Create the RAG chain
 rag_chain = (
     {"context": retriever, "question": RunnablePassthrough()}
     | prompt
@@ -125,24 +100,19 @@ rag_chain = (
     | StrOutputParser()
 )
-# Define the Gradio function
-@spaces.GPU()
-def ask_question_gradio(question):
-    result = ""
     for chunk in rag_chain.stream(question):
-        result += chunk
-    return result
-# Create the Gradio interface
-interface = gr.Interface(
-    fn=ask_question_gradio,
-    inputs="text",
-    outputs="text",
-    title="Mawared Expert Assistant",
-    description="Ask questions about the Mawared HR System or any related topic using Chain-of-Thought (CoT) and RAG principles.",
-    theme="compact",
-)
-# Launch Gradio app
 if __name__ == "__main__":
-    interface.launch()

 from langchain_community.vectorstores import Qdrant
+from langchain_groq import ChatGroq
 from langchain_huggingface import HuggingFaceEmbeddings
+import os
+from dotenv import load_dotenv
 from langchain.prompts import ChatPromptTemplate
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from qdrant_client import QdrantClient, models
 from langchain_qdrant import Qdrant
+from langchain_qdrant import QdrantVectorStore
+from langchain_huggingface import ChatHuggingFace
 # Load environment variables
 load_dotenv()
+os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API")
+HF_TOKEN = os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
 # HuggingFace Embeddings
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
 # Qdrant Client Setup
 client = QdrantClient(
+    url=os.getenv("QDRANT_URL"),
+    api_key=os.getenv("QDRANT_API_KEY"),
+    prefer_grpc=True
 )
+collection_name = "mawared"
 # Try to create collection, handle if it already exists
 try:
             size=768,  # GTE-large embedding size
             distance=models.Distance.COSINE
         ),
     )
     print(f"Created new collection: {collection_name}")
 except Exception as e:
     search_kwargs={"k": 5}
 )
+llm = ChatOpenAI(base_url="https://api-inference.huggingface.co/v1/", temperature=0 , api_key=HF_TOKEN , model="meta-llama/Llama-3.3-70B-Instruct")
 # Create prompt template
 template = """
 prompt = ChatPromptTemplate.from_template(template)
+# Create the RAG chain using LCEL with prompt printing and streaming output
 rag_chain = (
     {"context": retriever, "question": RunnablePassthrough()}
     | prompt
     | StrOutputParser()
 )
+# Function to ask questions
+def ask_question(question):
+    print("Answer:\t", end=" ", flush=True)
     for chunk in rag_chain.stream(question):
+        print(chunk, end="", flush=True)
+    print("\n")
+# Example usage
 if __name__ == "__main__":
+    while True:
+        user_question = input("\n \n \n Ask a question (or type 'quit' to exit): ")
+        if user_question.lower() == 'quit':
+            break
+        answer = ask_question(user_question)
+        # print("\nFull answer received.\n")