tosin2013 commited on
Commit
835d717
·
verified ·
1 Parent(s): 70aef6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -76,7 +76,6 @@ else:
76
  with open(NN_MODEL_FILE, 'wb') as f:
77
  pickle.dump(nn, f)
78
 
79
- @spaces.GPU
80
  def get_relevant_documents(query, k=5):
81
  """Retrieves the k most relevant documents to the query."""
82
  start_time = time.time()
@@ -92,21 +91,23 @@ def get_relevant_documents(query, k=5):
92
  def generate_response(question, history):
93
  """Generates a response to the user's question, handling GPU/CPU fallback."""
94
  start_time = time.time()
 
 
95
  try:
96
- response = _generate_response_gpu(question, history)
97
  except Exception as e:
98
  print(f"[WARNING] GPU failed: {str(e)}")
99
- response = _generate_response_cpu(question, history)
 
100
  elapsed_time = time.time() - start_time
101
  print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
102
  return history, history # Return updated history twice for Gradio
103
 
104
  @spaces.GPU
105
- def _generate_response_gpu(question, history):
106
  """Generates a response using the GPU."""
107
  print(f"\n[LOG] Received question: {question}")
108
- relevant_docs = get_relevant_documents(question, k=3)
109
- print(f"[LOG] Retrieved {len(relevant_docs)} relevant documents")
110
  context = "\n".join(relevant_docs)
111
  prompt = f"""### MEMORY ###
112
  Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
@@ -155,38 +156,38 @@ Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
155
  history.append((question, response))
156
  return history
157
 
158
- def _generate_response_cpu(question, history):
159
  """Generates a response using the CPU (fallback)."""
160
  print(f"[LOG] Running on CPU")
161
- try:
162
- relevant_docs = get_relevant_documents(question, k=3)
163
- context = "\n".join(relevant_docs)
164
- prompt = f"""### MEMORY ###
165
  Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
166
  ### SYSTEM GUARDRAILS ###
167
  If unsure about the user's request, ask clarifying questions rather than making assumptions.
168
  Do not fabricate data or features not supported by AutoGen v0.4.
169
  Ensure the code is scalable, modular, and adheres to best practices.
170
  Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
171
- print(f"[LOG] Generated prompt: {prompt[:200]}...")
172
 
173
- if MODEL_PROVIDER == "huggingface":
 
174
  messages = [{"role": "user", "content": prompt}]
175
  completion = hf_client.chat.completions.create(model=MODEL_NAME, messages=messages, max_tokens=500)
176
  response = completion.choices[0].message.content
177
- elif MODEL_PROVIDER == "openai":
 
 
 
178
  response = client.chat.completions.create(
179
  model=OPENAI_MODEL,
180
  messages=[{"role": "user", "content": prompt}]
181
  ).choices[0].message.content
 
 
182
 
183
- history.append((question, response))
184
- return history
185
- except Exception as e:
186
- error_msg = f"Error generating response: {str(e)}"
187
- print(f"[ERROR] {error_msg}")
188
- history.append((question, error_msg))
189
- return history
190
 
191
  # Gradio Interface
192
  print("[CHAT] Initializing chat interface...")
@@ -215,7 +216,7 @@ with gr.Blocks() as demo:
215
  submit_button.click(
216
  fn=generate_response,
217
  inputs=[question_textbox, chatbot],
218
- outputs=[chatbot], # Output the updated history to the chatbot
219
  queue=True
220
  )
221
 
 
76
  with open(NN_MODEL_FILE, 'wb') as f:
77
  pickle.dump(nn, f)
78
 
 
79
  def get_relevant_documents(query, k=5):
80
  """Retrieves the k most relevant documents to the query."""
81
  start_time = time.time()
 
91
  def generate_response(question, history):
92
  """Generates a response to the user's question, handling GPU/CPU fallback."""
93
  start_time = time.time()
94
+ relevant_docs = get_relevant_documents(question, k=3) # Call it here
95
+
96
  try:
97
+ response = _generate_response_gpu(question, history, relevant_docs)
98
  except Exception as e:
99
  print(f"[WARNING] GPU failed: {str(e)}")
100
+ response = _generate_response_cpu(question, history, relevant_docs)
101
+
102
  elapsed_time = time.time() - start_time
103
  print(f"[PERF] generate_response took {elapsed_time:.2f} seconds")
104
  return history, history # Return updated history twice for Gradio
105
 
106
  @spaces.GPU
107
+ def _generate_response_gpu(question, history, relevant_docs):
108
  """Generates a response using the GPU."""
109
  print(f"\n[LOG] Received question: {question}")
110
+ print(f"[LOG] Using pre-retrieved {len(relevant_docs)} relevant documents")
 
111
  context = "\n".join(relevant_docs)
112
  prompt = f"""### MEMORY ###
113
  Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
 
156
  history.append((question, response))
157
  return history
158
 
159
+ def _generate_response_cpu(question, history, relevant_docs):
160
  """Generates a response using the CPU (fallback)."""
161
  print(f"[LOG] Running on CPU")
162
+ print(f"[LOG] Using pre-retrieved {len(relevant_docs)} relevant documents")
163
+ context = "\n".join(relevant_docs)
164
+ prompt = f"""### MEMORY ###
 
165
  Recall all previously provided instructions, context, and data throughout this conversation to ensure consistency and coherence. Use the details from the last interaction to guide your response.
166
  ### SYSTEM GUARDRAILS ###
167
  If unsure about the user's request, ask clarifying questions rather than making assumptions.
168
  Do not fabricate data or features not supported by AutoGen v0.4.
169
  Ensure the code is scalable, modular, and adheres to best practices.
170
  Context: {context}\n\nQuestion: {question}\n\nAnswer:"""
171
+ print(f"[LOG] Generated prompt: {prompt[:200]}...")
172
 
173
+ if MODEL_PROVIDER == "huggingface":
174
+ try:
175
  messages = [{"role": "user", "content": prompt}]
176
  completion = hf_client.chat.completions.create(model=MODEL_NAME, messages=messages, max_tokens=500)
177
  response = completion.choices[0].message.content
178
+ except Exception as e:
179
+ response = f"Error generating response from Hugging Face model: {str(e)}"
180
+ elif MODEL_PROVIDER == "openai":
181
+ try:
182
  response = client.chat.completions.create(
183
  model=OPENAI_MODEL,
184
  messages=[{"role": "user", "content": prompt}]
185
  ).choices[0].message.content
186
+ except Exception as e:
187
+ response = f"Error generating response from OpenAI model: {str(e)}"
188
 
189
+ history.append((question, response))
190
+ return history
 
 
 
 
 
191
 
192
  # Gradio Interface
193
  print("[CHAT] Initializing chat interface...")
 
216
  submit_button.click(
217
  fn=generate_response,
218
  inputs=[question_textbox, chatbot],
219
+ outputs=[chatbot, chatbot], # Output the updated history to the chatbot
220
  queue=True
221
  )
222