Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,7 @@ class ChatHistory:
|
|
49 |
def clear(self):
|
50 |
self.messages = []
|
51 |
|
52 |
-
# Load environment variables and setup
|
53 |
load_dotenv()
|
54 |
|
55 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
@@ -62,7 +62,6 @@ if not HF_TOKEN:
|
|
62 |
|
63 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
64 |
|
65 |
-
# Qdrant setup (same as before)
|
66 |
try:
|
67 |
client = QdrantClient(
|
68 |
url=os.getenv("QDRANT_URL"),
|
@@ -102,7 +101,7 @@ retriever = db.as_retriever(
|
|
102 |
llm = ChatCerebras(
|
103 |
model="llama-3.3-70b",
|
104 |
api_key=C_apikey,
|
105 |
-
streaming=True
|
106 |
)
|
107 |
|
108 |
template = """
|
@@ -140,7 +139,7 @@ def create_rag_chain(chat_history: str):
|
|
140 |
|
141 |
chat_history = ChatHistory()
|
142 |
|
143 |
-
def process_stream(stream_queue: Queue, history: List[
|
144 |
"""Process the streaming response and update the chat interface"""
|
145 |
current_response = ""
|
146 |
|
@@ -151,19 +150,21 @@ def process_stream(stream_queue: Queue, history: List[dict]) -> Generator[List[d
|
|
151 |
|
152 |
current_response += chunk
|
153 |
new_history = history.copy()
|
154 |
-
new_history[-1][
|
155 |
yield new_history
|
156 |
|
157 |
@spaces.GPU()
|
158 |
-
def ask_question_gradio(question: str, history: List[
|
159 |
try:
|
|
|
|
|
|
|
160 |
chat_history.add_message("user", question)
|
161 |
formatted_history = chat_history.get_formatted_history()
|
162 |
rag_chain = create_rag_chain(formatted_history)
|
163 |
|
164 |
-
# Update history with user message
|
165 |
-
history.append(
|
166 |
-
history.append({"role": "assistant", "content": ""})
|
167 |
|
168 |
# Create a queue for streaming responses
|
169 |
stream_queue = Queue()
|
@@ -184,7 +185,7 @@ def ask_question_gradio(question: str, history: List[dict]) -> Generator[tuple,
|
|
184 |
# Yield updates to the chat interface
|
185 |
response = ""
|
186 |
for updated_history in process_stream(stream_queue, history):
|
187 |
-
response = updated_history[-1][
|
188 |
yield "", updated_history
|
189 |
|
190 |
# Add final response to chat history
|
@@ -192,7 +193,9 @@ def ask_question_gradio(question: str, history: List[dict]) -> Generator[tuple,
|
|
192 |
|
193 |
except Exception as e:
|
194 |
logger.error(f"Error during question processing: {e}")
|
195 |
-
|
|
|
|
|
196 |
yield "", history
|
197 |
|
198 |
def clear_chat():
|
|
|
49 |
def clear(self):
|
50 |
self.messages = []
|
51 |
|
52 |
+
# Load environment variables and setup
|
53 |
load_dotenv()
|
54 |
|
55 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
62 |
|
63 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
64 |
|
|
|
65 |
try:
|
66 |
client = QdrantClient(
|
67 |
url=os.getenv("QDRANT_URL"),
|
|
|
101 |
llm = ChatCerebras(
|
102 |
model="llama-3.3-70b",
|
103 |
api_key=C_apikey,
|
104 |
+
streaming=True
|
105 |
)
|
106 |
|
107 |
template = """
|
|
|
139 |
|
140 |
chat_history = ChatHistory()
|
141 |
|
142 |
+
def process_stream(stream_queue: Queue, history: List[List[str]]) -> Generator[List[List[str]], None, None]:
|
143 |
"""Process the streaming response and update the chat interface"""
|
144 |
current_response = ""
|
145 |
|
|
|
150 |
|
151 |
current_response += chunk
|
152 |
new_history = history.copy()
|
153 |
+
new_history[-1][1] = current_response # Update the assistant's message
|
154 |
yield new_history
|
155 |
|
156 |
@spaces.GPU()
|
157 |
+
def ask_question_gradio(question: str, history: List[List[str]]) -> Generator[tuple, None, None]:
|
158 |
try:
|
159 |
+
if history is None:
|
160 |
+
history = []
|
161 |
+
|
162 |
chat_history.add_message("user", question)
|
163 |
formatted_history = chat_history.get_formatted_history()
|
164 |
rag_chain = create_rag_chain(formatted_history)
|
165 |
|
166 |
+
# Update history with user message and empty assistant message
|
167 |
+
history.append([question, ""]) # User message
|
|
|
168 |
|
169 |
# Create a queue for streaming responses
|
170 |
stream_queue = Queue()
|
|
|
185 |
# Yield updates to the chat interface
|
186 |
response = ""
|
187 |
for updated_history in process_stream(stream_queue, history):
|
188 |
+
response = updated_history[-1][1]
|
189 |
yield "", updated_history
|
190 |
|
191 |
# Add final response to chat history
|
|
|
193 |
|
194 |
except Exception as e:
|
195 |
logger.error(f"Error during question processing: {e}")
|
196 |
+
if not history:
|
197 |
+
history = []
|
198 |
+
history.append([question, "An error occurred. Please try again later."])
|
199 |
yield "", history
|
200 |
|
201 |
def clear_chat():
|