Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -76,7 +76,6 @@ else:
|
|
76 |
with open(NN_MODEL_FILE, 'wb') as f:
|
77 |
pickle.dump(nn, f)
|
78 |
|
79 |
-
@spaces.GPU
|
80 |
def get_relevant_documents(query, k=5):
|
81 |
"""Retrieves the k most relevant documents to the query."""
|
82 |
start_time = time.time()
|
@@ -92,21 +91,23 @@ def get_relevant_documents(query, k=5):
|
|
92 |
def generate_response(question, history):
|
93 |
"""Generates a response to the user's question, handling GPU/CPU fallback."""
|
94 |
start_time = time.time()
|
|
|
|
|
95 |
try:
|
96 |
-
response = _generate_response_gpu(question, history)
|
97 |
except Exception as e:
|
98 |
print(f"[WARNING] GPU failed: {str(e)}")
|
99 |
-
response = _generate_response_cpu(question, history)
|
|
|
100 |
elapsed_time = time.time() - start_time
|
101 |
print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
|
102 |
return history, history # Return updated history twice for Gradio
|
103 |
|
104 |
@spaces.GPU
|
105 |
-
def _generate_response_gpu(question, history):
|
106 |
"""Generates a response using the GPU."""
|
107 |
print(f"\n[LOG] Received question: {question}")
|
108 |
-
|
109 |
-
print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
|
110 |
context = "\n".join(relevant_docs)
|
111 |
prompt = f"""### MEMORY ###
|
112 |
Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
@@ -155,38 +156,38 @@ Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
|
|
155 |
history.append((question, response))
|
156 |
return history
|
157 |
|
158 |
-
def _generate_response_cpu(question, history):
|
159 |
"""Generates a response using the CPU (fallback)."""
|
160 |
print(f"[LOG] Running on CPU")
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
prompt = f"""### MEMORY ###
|
165 |
Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
166 |
### SYSTEM GUARDRAILS ###
|
167 |
If unsure about the user's request, ask clarifying questions rather than making assumptions.
|
168 |
Do not fabricate data or features not supported by AutoGen v0.4.
|
169 |
Ensure the code is scalable, modular, and adheres to best practices.
|
170 |
Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
|
171 |
-
|
172 |
|
173 |
-
|
|
|
174 |
messages = [{"role": "user", "content": prompt}]
|
175 |
completion = hf_client.chat.completions.create(model=MODEL_NAME, messages=messages, max_tokens=500)
|
176 |
response = completion.choices[0].message.content
|
177 |
-
|
|
|
|
|
|
|
178 |
response = client.chat.completions.create(
|
179 |
model=OPENAI_MODEL,
|
180 |
messages=[{"role": "user", "content": prompt}]
|
181 |
).choices[0].message.content
|
|
|
|
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
except Exception as e:
|
186 |
-
error_msg = f"Error generating response: {str(e)}"
|
187 |
-
print(f"[ERROR] {error_msg}")
|
188 |
-
history.append((question, error_msg))
|
189 |
-
return history
|
190 |
|
191 |
# Gradio Interface
|
192 |
print("[CHAT] Initializing chat interface...")
|
@@ -215,7 +216,7 @@ with gr.Blocks() as demo:
|
|
215 |
submit_button.click(
|
216 |
fn=generate_response,
|
217 |
inputs=[question_textbox, chatbot],
|
218 |
-
outputs=[chatbot], # Output the updated history to the chatbot
|
219 |
queue=True
|
220 |
)
|
221 |
|
|
|
76 |
with open(NN_MODEL_FILE, 'wb') as f:
|
77 |
pickle.dump(nn, f)
|
78 |
|
|
|
79 |
def get_relevant_documents(query, k=5):
|
80 |
"""Retrieves the k most relevant documents to the query."""
|
81 |
start_time = time.time()
|
|
|
91 |
def generate_response(question, history):
|
92 |
"""Generates a response to the user's question, handling GPU/CPU fallback."""
|
93 |
start_time = time.time()
|
94 |
+
relevant_docs = get_relevant_documents(question, k=3) # Call it here
|
95 |
+
|
96 |
try:
|
97 |
+
response = _generate_response_gpu(question, history, relevant_docs)
|
98 |
except Exception as e:
|
99 |
print(f"[WARNING] GPU failed: {str(e)}")
|
100 |
+
response = _generate_response_cpu(question, history, relevant_docs)
|
101 |
+
|
102 |
elapsed_time = time.time() - start_time
|
103 |
print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
|
104 |
return history, history # Return updated history twice for Gradio
|
105 |
|
106 |
@spaces.GPU
|
107 |
+
def _generate_response_gpu(question, history, relevant_docs):
|
108 |
"""Generates a response using the GPU."""
|
109 |
print(f"\n[LOG] Received question: {question}")
|
110 |
+
print(f"[LOG] Using pre-retrieved {len(relevant_docs)} relevant documents")
|
|
|
111 |
context = "\n".join(relevant_docs)
|
112 |
prompt = f"""### MEMORY ###
|
113 |
Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
|
|
156 |
history.append((question, response))
|
157 |
return history
|
158 |
|
159 |
+
def _generate_response_cpu(question, history, relevant_docs):
|
160 |
"""Generates a response using the CPU (fallback)."""
|
161 |
print(f"[LOG] Running on CPU")
|
162 |
+
print(f"[LOG] Using pre-retrieved {len(relevant_docs)} relevant documents")
|
163 |
+
context = "\n".join(relevant_docs)
|
164 |
+
prompt = f"""### MEMORY ###
|
|
|
165 |
Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
|
166 |
### SYSTEM GUARDRAILS ###
|
167 |
If unsure about the user's request, ask clarifying questions rather than making assumptions.
|
168 |
Do not fabricate data or features not supported by AutoGen v0.4.
|
169 |
Ensure the code is scalable, modular, and adheres to best practices.
|
170 |
Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
|
171 |
+
print(f"[LOG] Generated prompt: {prompt[:200]}...")
|
172 |
|
173 |
+
if MODEL_PROVIDER == "huggingface":
|
174 |
+
try:
|
175 |
messages = [{"role": "user", "content": prompt}]
|
176 |
completion = hf_client.chat.completions.create(model=MODEL_NAME, messages=messages, max_tokens=500)
|
177 |
response = completion.choices[0].message.content
|
178 |
+
except Exception as e:
|
179 |
+
response = f"Error generating response from Hugging Face model: {str(e)}"
|
180 |
+
elif MODEL_PROVIDER == "openai":
|
181 |
+
try:
|
182 |
response = client.chat.completions.create(
|
183 |
model=OPENAI_MODEL,
|
184 |
messages=[{"role": "user", "content": prompt}]
|
185 |
).choices[0].message.content
|
186 |
+
except Exception as e:
|
187 |
+
response = f"Error generating response from OpenAI model: {str(e)}"
|
188 |
|
189 |
+
history.append((question, response))
|
190 |
+
return history
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
# Gradio Interface
|
193 |
print("[CHAT] Initializing chat interface...")
|
|
|
216 |
submit_button.click(
|
217 |
fn=generate_response,
|
218 |
inputs=[question_textbox, chatbot],
|
219 |
+
outputs=[chatbot, chatbot], # Output the updated history to the chatbot
|
220 |
queue=True
|
221 |
)
|
222 |
|