Spaces:

Daemontatox
/

Mawared-Support-Assistant

Running

App Files Files Community

Daemontatox commited on Jan 9

Commit

62138c4

verified ·

1 Parent(s): 2c2653b

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -9

app.py CHANGED Viewed

@@ -1,30 +1,45 @@
 from langchain_community.vectorstores import Qdrant
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.llms import HuggingFacePipeline
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import os
-from dotenv import load_dotenv
 from langchain.prompts import ChatPromptTemplate
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from qdrant_client import QdrantClient, models
 from langchain_qdrant import Qdrant
 import gradio as gr
-import spaces
 # Load environment variables
 load_dotenv()
 # HuggingFace Embeddings
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
 # Qdrant Client Setup
 client = QdrantClient(
-    url=os.getenv("QDRANT_URL"),
-    api_key=os.getenv("QDRANT_API_KEY"),
-    # prefer_grpc=True
 )
 collection_name = "mawared"
 # Try to create collection, handle if it already exists
@@ -57,12 +72,16 @@ retriever = db.as_retriever(
 )
 # Load Hugging Face Model
-model_name = "Daemontatox/CogitoZ14"  # Replace with your desired model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
 # Create Hugging Face Pipeline with the specified model and tokenizer
-hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
 # LangChain LLM using Hugging Face Pipeline
 llm = HuggingFacePipeline(pipeline=hf_pipeline)
@@ -105,7 +124,7 @@ rag_chain = (
 )
 # Define the Gradio function
-@spaces.GPU(duration=120)
 def ask_question_gradio(question):
     result = ""
     for chunk in rag_chain.stream(question):

+import os
+from dotenv import load_dotenv
 from langchain_community.vectorstores import Qdrant
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.llms import HuggingFacePipeline
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.prompts import ChatPromptTemplate
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from qdrant_client import QdrantClient, models
 from langchain_qdrant import Qdrant
 import gradio as gr
+import torch
 # Load environment variables
 load_dotenv()
+# Verify environment variables
+qdrant_url = os.getenv("QDRANT_URL")
+qdrant_api_key = os.getenv("QDRANT_API_KEY")
+print(f"QDRANT_URL: {qdrant_url}")
+print(f"QDRANT_API_KEY: {qdrant_api_key}")
 # HuggingFace Embeddings
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
 # Qdrant Client Setup
 client = QdrantClient(
+    url=qdrant_url,
+    api_key=qdrant_api_key,
+    prefer_grpc=True
 )
+# Check if the connection is successful
+try:
+    client.get_collection(collection_name)
+    print(f"Successfully connected to Qdrant collection: {collection_name}")
+except Exception as e:
+    print(f"Failed to connect to Qdrant: {e}")
+    raise e
 collection_name = "mawared"
 # Try to create collection, handle if it already exists
 )
 # Load Hugging Face Model
+model_name = "NousResearch/Hermes-3-Llama-3.2-3B"  # Replace with your desired model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
+# Ensure the model is on the GPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
 # Create Hugging Face Pipeline with the specified model and tokenizer
+hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
 # LangChain LLM using Hugging Face Pipeline
 llm = HuggingFacePipeline(pipeline=hf_pipeline)
 )
 # Define the Gradio function
+@spaces.GPU()
 def ask_question_gradio(question):
     result = ""
     for chunk in rag_chain.stream(question):