Daemontatox commited on
Commit
62138c4
·
verified ·
1 Parent(s): 2c2653b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -1,30 +1,45 @@
 
 
1
  from langchain_community.vectorstores import Qdrant
2
  from langchain_huggingface import HuggingFaceEmbeddings
3
  from langchain.llms import HuggingFacePipeline
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
- import os
6
- from dotenv import load_dotenv
7
  from langchain.prompts import ChatPromptTemplate
8
  from langchain.schema.runnable import RunnablePassthrough
9
  from langchain.schema.output_parser import StrOutputParser
10
  from qdrant_client import QdrantClient, models
11
  from langchain_qdrant import Qdrant
12
  import gradio as gr
13
- import spaces
14
 
15
  # Load environment variables
16
  load_dotenv()
17
 
 
 
 
 
 
 
 
18
  # HuggingFace Embeddings
19
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
20
 
21
  # Qdrant Client Setup
22
  client = QdrantClient(
23
- url=os.getenv("QDRANT_URL"),
24
- api_key=os.getenv("QDRANT_API_KEY"),
25
- # prefer_grpc=True
26
  )
27
 
 
 
 
 
 
 
 
 
28
  collection_name = "mawared"
29
 
30
  # Try to create collection, handle if it already exists
@@ -57,12 +72,16 @@ retriever = db.as_retriever(
57
  )
58
 
59
  # Load Hugging Face Model
60
- model_name = "Daemontatox/CogitoZ14" # Replace with your desired model
61
  tokenizer = AutoTokenizer.from_pretrained(model_name)
62
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
63
 
 
 
 
 
64
  # Create Hugging Face Pipeline with the specified model and tokenizer
65
- hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
66
 
67
  # LangChain LLM using Hugging Face Pipeline
68
  llm = HuggingFacePipeline(pipeline=hf_pipeline)
@@ -105,7 +124,7 @@ rag_chain = (
105
  )
106
 
107
  # Define the Gradio function
108
- @spaces.GPU(duration=120)
109
  def ask_question_gradio(question):
110
  result = ""
111
  for chunk in rag_chain.stream(question):
 
1
+ import os
2
+ from dotenv import load_dotenv
3
  from langchain_community.vectorstores import Qdrant
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain.llms import HuggingFacePipeline
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
 
7
  from langchain.prompts import ChatPromptTemplate
8
  from langchain.schema.runnable import RunnablePassthrough
9
  from langchain.schema.output_parser import StrOutputParser
10
  from qdrant_client import QdrantClient, models
11
  from langchain_qdrant import Qdrant
12
  import gradio as gr
13
+ import torch
14
 
15
  # Load environment variables
16
  load_dotenv()
17
 
18
+ # Verify environment variables
19
+ qdrant_url = os.getenv("QDRANT_URL")
20
+ qdrant_api_key = os.getenv("QDRANT_API_KEY")
21
+
22
+ print(f"QDRANT_URL: {qdrant_url}")
23
+ print(f"QDRANT_API_KEY: {qdrant_api_key}")
24
+
25
  # HuggingFace Embeddings
26
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
27
 
28
  # Qdrant Client Setup
29
  client = QdrantClient(
30
+ url=qdrant_url,
31
+ api_key=qdrant_api_key,
32
+ prefer_grpc=True
33
  )
34
 
35
+ # Check if the connection is successful
36
+ try:
37
+ client.get_collection(collection_name)
38
+ print(f"Successfully connected to Qdrant collection: {collection_name}")
39
+ except Exception as e:
40
+ print(f"Failed to connect to Qdrant: {e}")
41
+ raise e
42
+
43
  collection_name = "mawared"
44
 
45
  # Try to create collection, handle if it already exists
 
72
  )
73
 
74
  # Load Hugging Face Model
75
+ model_name = "NousResearch/Hermes-3-Llama-3.2-3B" # Replace with your desired model
76
  tokenizer = AutoTokenizer.from_pretrained(model_name)
77
  model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
78
 
79
+ # Ensure the model is on the GPU
80
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
81
+ model.to(device)
82
+
83
  # Create Hugging Face Pipeline with the specified model and tokenizer
84
+ hf_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
85
 
86
  # LangChain LLM using Hugging Face Pipeline
87
  llm = HuggingFacePipeline(pipeline=hf_pipeline)
 
124
  )
125
 
126
  # Define the Gradio function
127
+ @spaces.GPU()
128
  def ask_question_gradio(question):
129
  result = ""
130
  for chunk in rag_chain.stream(question):