sagar007 commited on
Commit
75a1aab
ยท
verified ยท
1 Parent(s): aa6ca85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -442
app.py CHANGED
@@ -10,9 +10,9 @@ import subprocess
10
  import numpy as np
11
  from typing import List, Dict, Tuple, Any, Optional, Union
12
  from functools import lru_cache
13
- # No asyncio needed for synchronous version
14
  import threading
15
- # No ThreadPoolExecutor needed for synchronous version
16
  import warnings
17
  import traceback # For detailed error logging
18
  import re # For text cleaning
@@ -30,58 +30,36 @@ MAX_NEW_TOKENS = 300
30
  TEMPERATURE = 0.7
31
  TOP_P = 0.95
32
  KOKORO_PATH = 'Kokoro-82M'
33
- # Define expected durations for ZeroGPU decorator
34
- LLM_GPU_DURATION = 120 # Seconds (adjust based on expected LLM generation time)
35
- TTS_GPU_DURATION = 45 # Seconds (adjust based on expected TTS generation time)
36
 
37
  # --- Initialization ---
38
- # Suppress specific warnings
39
  warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
40
  warnings.filterwarnings("ignore", message="Backend 'inductor' is not available.")
41
 
42
  # --- LLM Initialization ---
43
  llm_model: Optional[AutoModelForCausalLM] = None
44
  llm_tokenizer: Optional[AutoTokenizer] = None
45
- llm_device = "cpu"
46
-
47
  try:
48
  print("[LLM Init] Initializing Language Model...")
49
  llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
50
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
51
-
52
- # For ZeroGPU, we assume GPU will be available when needed, load with cuda preference
53
- # If running locally without GPU, it might try CPU based on device_map="auto" fallback
54
- llm_device = "cuda" if torch.cuda.is_available() else "cpu" # Check initial availability info
55
  torch_dtype = torch.float16 if llm_device == "cuda" else torch.float32
56
- # device_map="auto" is generally okay, ZeroGPU handles the actual assignment during decorated function call
57
  device_map = "auto"
58
  print(f"[LLM Init] Preparing model load (target device via ZeroGPU: cuda, dtype={torch_dtype})")
59
-
60
  llm_model = AutoModelForCausalLM.from_pretrained(
61
- MODEL_NAME,
62
- device_map=device_map, # Let accelerate/ZeroGPU handle placement
63
- low_cpu_mem_usage=True,
64
- torch_dtype=torch_dtype,
65
  )
66
- print(f"[LLM Init] LLM loaded configuration successfully. Ready for GPU assignment via @spaces.GPU.")
67
  llm_model.eval()
68
-
69
  except Exception as e:
70
  print(f"[LLM Init] FATAL: Error initializing LLM model: {str(e)}")
71
- print(traceback.format_exc())
72
- llm_model = None
73
- llm_tokenizer = None
74
  print("[LLM Init] LLM features will be unavailable.")
75
 
76
-
77
  # --- TTS Initialization ---
78
- # (TTS setup remains the same, runs in background)
79
- VOICE_CHOICES = {
80
- '๐Ÿ‡บ๐Ÿ‡ธ Female (Default)': 'af',
81
- '๐Ÿ‡บ๐Ÿ‡ธ Bella': 'af_bella',
82
- '๐Ÿ‡บ๐Ÿ‡ธ Sarah': 'af_sarah',
83
- '๐Ÿ‡บ๐Ÿ‡ธ Nicole': 'af_nicole'
84
- }
85
  TTS_ENABLED = False
86
  tts_model: Optional[Any] = None
87
  voicepacks: Dict[str, Any] = {}
@@ -92,18 +70,15 @@ def _run_subprocess(cmd: List[str], check: bool = True, cwd: Optional[str] = Non
92
  print(f"Running command: {' '.join(cmd)}")
93
  try:
94
  result = subprocess.run(cmd, check=check, capture_output=True, text=True, cwd=cwd, timeout=timeout)
 
95
  if not check or result.returncode != 0:
96
- if result.stdout: print(f" Stdout: {result.stdout.strip()}")
97
- if result.stderr: print(f" Stderr: {result.stderr.strip()}")
98
  elif result.returncode == 0 and ('clone' in cmd or 'pull' in cmd or 'install' in cmd):
99
- print(f" Command successful.")
100
  return result
101
- except FileNotFoundError:
102
- print(f" Error: Command not found - {cmd[0]}")
103
- raise
104
- except subprocess.TimeoutExpired:
105
- print(f" Error: Command timed out - {' '.join(cmd)}")
106
- raise
107
  except subprocess.CalledProcessError as e:
108
  print(f" Error running command: {' '.join(e.cmd)} (Code: {e.returncode})")
109
  if e.stdout: print(f" Stdout: {e.stdout.strip()}")
@@ -111,400 +86,277 @@ def _run_subprocess(cmd: List[str], check: bool = True, cwd: Optional[str] = Non
111
  raise
112
 
113
  def setup_tts_task():
114
- """Initializes Kokoro TTS model and dependencies."""
115
  global TTS_ENABLED, tts_model, voicepacks, tts_device
116
  print("[TTS Setup] Starting background initialization...")
117
-
118
- # TTS device determination depends on where generate_tts_speech will run.
119
- # If decorated with @spaces.GPU, it will use CUDA when called.
120
- tts_device = "cuda" # Assume it will run on GPU via decorator
121
- print(f"[TTS Setup] Target device for TTS model (via @spaces.GPU): {tts_device}")
122
-
123
  can_sudo = shutil.which('sudo') is not None
124
  apt_cmd_prefix = ['sudo'] if can_sudo else []
125
  absolute_kokoro_path = os.path.abspath(KOKORO_PATH)
126
-
127
  try:
128
  # 1. Clone/Update Repo
129
  if not os.path.exists(absolute_kokoro_path):
130
- print(f"[TTS Setup] Cloning repository to {absolute_kokoro_path}...")
131
- # (Cloning logic as before)
132
- try: _run_subprocess(['git', 'lfs', 'install', '--system', '--skip-repo'])
133
- except Exception as lfs_err: print(f"[TTS Setup] Warning: git lfs install failed: {lfs_err}")
134
- _run_subprocess(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', absolute_kokoro_path])
135
- try: _run_subprocess(['git', 'lfs', 'pull'], cwd=absolute_kokoro_path)
136
- except Exception as lfs_pull_err: print(f"[TTS Setup] Warning: git lfs pull failed: {lfs_pull_err}")
137
- else:
138
- print(f"[TTS Setup] Directory {absolute_kokoro_path} already exists.")
139
 
140
  # 2. Install espeak
141
  print("[TTS Setup] Checking/Installing espeak...")
142
- try: # (espeak install logic as before)
143
- _run_subprocess(apt_cmd_prefix + ['apt-get', 'update', '-qq'])
144
- _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak-ng'])
145
- print("[TTS Setup] espeak-ng installed or already present.")
146
  except Exception:
147
- print("[TTS Setup] espeak-ng installation failed, trying espeak...")
148
- try:
149
- _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak'])
150
- print("[TTS Setup] espeak installed or already present.")
151
- except Exception as espeak_err:
152
- print(f"[TTS Setup] ERROR: Failed to install espeak: {espeak_err}. TTS disabled.")
153
- return
154
 
155
  # 3. Load Kokoro Model and Voices
156
  sys_path_updated = False
157
  if os.path.exists(absolute_kokoro_path):
158
- print(f"[TTS Setup] Checking contents of: {absolute_kokoro_path}")
159
- try: print(f"[TTS Setup] Contents: {os.listdir(absolute_kokoro_path)}")
160
- except OSError as list_err: print(f"[TTS Setup] Warning: Could not list directory contents: {list_err}")
161
-
162
- if absolute_kokoro_path not in sys.path:
163
- sys.path.insert(0, absolute_kokoro_path)
164
- sys_path_updated = True
165
- print(f"[TTS Setup] Temporarily added {absolute_kokoro_path} to sys.path.")
166
-
167
- try:
168
- print("[TTS Setup] Attempting to import Kokoro modules...")
169
- from models import build_model
170
- from kokoro import generate as generate_tts_internal
171
- print("[TTS Setup] Kokoro modules imported successfully.")
172
-
173
- globals()['build_model'] = build_model
174
- globals()['generate_tts_internal'] = generate_tts_internal
175
-
176
- model_file = os.path.join(absolute_kokoro_path, 'kokoro-v0_19.pth')
177
- if not os.path.exists(model_file):
178
- print(f"[TTS Setup] ERROR: Model file {model_file} not found. TTS disabled.")
179
- return
180
-
181
- # Load model onto CPU initially, ZeroGPU decorator will handle moving/using GPU
182
- print(f"[TTS Setup] Loading TTS model config from {model_file} (target device: {tts_device} via @spaces.GPU)...")
183
- # Load onto CPU first to avoid issues before GPU is attached.
184
- # The build_model function might need adjustment if it forces device placement.
185
- # Assuming build_model can load structure then decorator handles device use.
186
- # If build_model *requires* device at load, this might need adjustment.
187
- tts_model = build_model(model_file, 'cpu') # <<< Load to CPU first
188
- tts_model.eval()
189
- print("[TTS Setup] TTS model structure loaded (CPU).")
190
-
191
- # Load voices onto CPU
192
- loaded_voices = 0
193
- for voice_name, voice_id in VOICE_CHOICES.items():
194
- voice_file_path = os.path.join(absolute_kokoro_path, 'voices', f'{voice_id}.pt')
195
- if os.path.exists(voice_file_path):
196
- try:
197
- print(f"[TTS Setup] Loading voice: {voice_id} ({voice_name}) to CPU")
198
- voicepacks[voice_id] = torch.load(voice_file_path, map_location='cpu') # <<< Load to CPU
199
- loaded_voices += 1
200
- except Exception as e: print(f"[TTS Setup] Warning: Failed to load voice {voice_id}: {str(e)}")
201
- else: print(f"[TTS Setup] Info: Voice file {voice_file_path} not found.")
202
-
203
- if loaded_voices == 0:
204
- print("[TTS Setup] ERROR: No voicepacks loaded. TTS disabled.")
205
- tts_model = None; return
206
-
207
- TTS_ENABLED = True
208
- print(f"[TTS Setup] Initialization successful. {loaded_voices} voices loaded. TTS Enabled: {TTS_ENABLED}")
209
-
210
- except ImportError as ie:
211
- print(f"[TTS Setup] ERROR: Failed to import Kokoro modules: {ie}.")
212
- print(traceback.format_exc())
213
- except Exception as load_err:
214
- print(f"[TTS Setup] ERROR: Exception during TTS model/voice loading: {load_err}. TTS disabled.")
215
- print(traceback.format_exc())
216
- finally:
217
- if sys_path_updated: # Cleanup sys.path
218
- try:
219
- if sys.path[0] == absolute_kokoro_path: sys.path.pop(0)
220
- elif absolute_kokoro_path in sys.path: sys.path.remove(absolute_kokoro_path)
221
- print(f"[TTS Setup] Cleaned up sys.path.")
222
- except Exception as cleanup_err: print(f"[TTS Setup] Warning: Error cleaning sys.path: {cleanup_err}")
223
- else:
224
- print(f"[TTS Setup] ERROR: Directory {absolute_kokoro_path} not found. TTS disabled.")
225
-
226
- except Exception as e:
227
- print(f"[TTS Setup] ERROR: Unexpected error during setup: {str(e)}")
228
- print(traceback.format_exc())
229
- TTS_ENABLED = False; tts_model = None; voicepacks.clear()
230
 
231
- # Start TTS setup thread
232
  print("Starting TTS setup thread...")
233
  tts_setup_thread = threading.Thread(target=setup_tts_task, daemon=True)
234
  tts_setup_thread.start()
235
 
236
-
237
- # --- Core Logic Functions (SYNCHRONOUS + @spaces.GPU) ---
238
-
239
- # Web search remains synchronous
240
  @lru_cache(maxsize=128)
241
  def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
242
  """Synchronous web search function with caching."""
243
- # (Implementation remains the same as before)
244
  print(f"[Web Search] Searching (sync): '{query}' (max_results={max_results})")
245
  try:
246
  with DDGS() as ddgs:
247
  results = list(ddgs.text(query, max_results=max_results, safesearch='moderate', timelimit='y'))
248
  print(f"[Web Search] Found {len(results)} results.")
249
- formatted = [{
250
- "id": i + 1, "title": res.get("title", "No Title"),
251
- "snippet": res.get("body", "No Snippet"), "url": res.get("href", "#"),
252
- } for i, res in enumerate(results)]
253
  return formatted
254
- except Exception as e:
255
- print(f"[Web Search] Error: {e}"); return []
256
 
257
- # Prompt formatting remains the same
258
  def format_llm_prompt(query: str, context: List[Dict[str, Any]]) -> str:
259
  """Formats the prompt for the LLM."""
260
- # (Implementation remains the same as before)
261
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
262
- context_str = "\n\n".join(
263
- [f"[{res['id']}] {html.escape(res['title'])}\n{html.escape(res['snippet'])}" for res in context]
264
- ) if context else "No relevant web context found."
265
- return f"""SYSTEM: You are a helpful AI assistant. Answer the user's query based *only* on the provided web search context. Cite sources using bracket notation like [1], [2]. If the context is insufficient, state that clearly. Use markdown for formatting. Do not add external information. Current Time: {current_time}
266
-
267
- CONTEXT:
268
- ---
269
- {context_str}
270
- ---
271
 
272
- USER: {html.escape(query)}
273
-
274
- ASSISTANT:"""
275
-
276
- # Source formatting remains the same
277
  def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
278
  """Formats search results into HTML for display."""
279
- # (Implementation remains the same as before)
280
  if not web_results: return "<div class='no-sources'>No sources found.</div>"
281
  items_html = ""
282
  for res in web_results:
283
- title_safe = html.escape(res.get("title", "Source"))
284
- snippet_safe = html.escape(res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else ""))
285
- url = html.escape(res.get("url", "#"))
286
  items_html += f"""<div class='source-item'><div class='source-number'>[{res['id']}]</div><div class='source-content'><a href="{url}" target="_blank" class='source-title' title="{url}">{title_safe}</a><div class='source-snippet'>{snippet_safe}</div></div></div>"""
287
  return f"<div class='sources-container'>{items_html}</div>"
288
 
289
-
290
- # <<<--- ADD @spaces.GPU decorator AND MAKE SYNCHRONOUS --->>>
291
  @spaces.GPU(duration=LLM_GPU_DURATION)
292
  def generate_llm_answer(prompt: str) -> str:
293
  """Generates answer using the LLM (Synchronous, GPU-decorated)."""
294
- if not llm_model or not llm_tokenizer:
295
- print("[LLM Generate] LLM model or tokenizer not available.")
296
- return "Error: Language Model is not available."
297
-
298
  print(f"[LLM Generate] Requesting generation (sync, GPU) (prompt length {len(prompt)})...")
299
  start_time = time.time()
300
  try:
301
- # Ensure model is on the GPU (ZeroGPU should handle this)
302
- # It might be safer to explicitly move model IF ZeroGPU doesn't guarantee it.
303
- # Let's assume ZeroGPU handles the context for now.
304
- current_device = next(llm_model.parameters()).device
305
- print(f"[LLM Generate] Model currently on device: {current_device}") # Debug device
306
-
307
- inputs = llm_tokenizer(
308
- prompt, return_tensors="pt", padding=True, truncation=True,
309
- max_length=1024, return_attention_mask=True
310
- ).to(current_device) # Send input to model's device
311
-
312
  with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(llm_model.dtype == torch.float16)):
313
- # Direct synchronous call
314
- outputs = llm_model.generate(
315
- inputs.input_ids,
316
- attention_mask=inputs.attention_mask,
317
- max_new_tokens=MAX_NEW_TOKENS,
318
- temperature=TEMPERATURE, top_p=TOP_P,
319
- pad_token_id=llm_tokenizer.eos_token_id,
320
- eos_token_id=llm_tokenizer.eos_token_id,
321
- do_sample=True, num_return_sequences=1
322
- )
323
-
324
- output_ids = outputs[0][inputs.input_ids.shape[1]:]
325
- answer_part = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
326
- if not answer_part: answer_part = "*Model generated an empty response.*"
327
-
328
- end_time = time.time()
329
- print(f"[LLM Generate] Generation complete in {end_time - start_time:.2f}s. Length: {len(answer_part)}")
330
  return answer_part
 
331
 
332
- except Exception as e:
333
- print(f"[LLM Generate] Error: {e}")
334
- print(traceback.format_exc())
335
- return f"Error during answer generation: Check logs."
336
-
337
-
338
- # <<<--- ADD @spaces.GPU decorator AND MAKE SYNCHRONOUS --->>>
339
  @spaces.GPU(duration=TTS_GPU_DURATION)
340
  def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple[int, np.ndarray]]:
341
- """Generates speech using TTS model (Synchronous, GPU-decorated)."""
342
- if not TTS_ENABLED or not tts_model or 'generate_tts_internal' not in globals():
343
- print("[TTS Generate] Skipping: TTS not ready.")
344
- return None
345
- if not text or not text.strip() or text.startswith("Error:") or text.startswith("*Model"):
346
- print("[TTS Generate] Skipping: Invalid or empty text.")
347
- return None
348
 
349
- print(f"[TTS Generate] Requesting speech (sync, GPU) (length {len(text)}, voice '{voice_id}')...")
350
  start_time = time.time()
351
 
 
 
 
 
 
352
  try:
 
353
  actual_voice_id = voice_id
354
  if voice_id not in voicepacks:
355
- print(f"[TTS Generate] Warning: Voice '{voice_id}' not loaded. Trying 'af'.")
356
  actual_voice_id = 'af'
357
- if 'af' not in voicepacks: print("[TTS Generate] Error: Default voice 'af' unavailable."); return None
358
-
359
- # Clean text (same cleaning logic as before)
360
- clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text)
361
- clean_text = re.sub(r'```.*?```', '', clean_text, flags=re.DOTALL)
362
- clean_text = re.sub(r'`[^`]*`', '', clean_text)
363
- clean_text = re.sub(r'^\s*[\*->]\s*', '', clean_text, flags=re.MULTILINE)
364
- clean_text = re.sub(r'[\*#_]', '', clean_text)
365
- clean_text = html.unescape(clean_text)
366
- clean_text = ' '.join(clean_text.split())
367
 
 
 
 
368
  if not clean_text: print("[TTS Generate] Skipping: Text empty after cleaning."); return None
369
 
 
370
  if len(clean_text) > MAX_TTS_CHARS:
371
  print(f"[TTS Generate] Truncating cleaned text from {len(clean_text)} to {MAX_TTS_CHARS} chars.")
372
- clean_text = clean_text[:MAX_TTS_CHARS]
373
- last_punct = max(clean_text.rfind(p) for p in '.?!; ')
374
  if last_punct != -1: clean_text = clean_text[:last_punct+1]
375
  clean_text += "..."
376
 
377
- print(f"[TTS Generate] Generating audio for: '{clean_text[:100]}...'")
 
 
378
  gen_func = globals()['generate_tts_internal']
379
- voice_pack_data = voicepacks[actual_voice_id]
380
 
381
- # *** Crucial for ZeroGPU: Move TTS model and voicepack to CUDA within the decorated function ***
382
- current_device = 'cuda' # Assume GPU is attached by decorator
383
  try:
384
- print(f"[TTS Generate] Moving TTS model to {current_device}...")
 
385
  tts_model.to(current_device)
386
- # Move voicepack data (might be a dict of tensors)
387
- if isinstance(voice_pack_data, dict):
388
- moved_voice_pack = {k: v.to(current_device) if isinstance(v, torch.Tensor) else v for k, v in voice_pack_data.items()}
389
- elif isinstance(voice_pack_data, torch.Tensor):
390
- moved_voice_pack = voice_pack_data.to(current_device)
391
- else:
392
- moved_voice_pack = voice_pack_data # Assume not tensors if not dict/tensor
393
- print(f"[TTS Generate] TTS model and voicepack on {current_device}.")
394
-
395
- # Direct synchronous call on GPU
396
- audio_data, _ = gen_func(tts_model, clean_text, moved_voice_pack, 'afr')
397
-
 
 
 
 
398
  finally:
399
- # *** Optional but recommended: Move model back to CPU to free GPU memory if needed ***
400
- # ZeroGPU might handle this, but explicit move-back can be safer if running locally too
401
  try:
402
  print("[TTS Generate] Moving TTS model back to CPU...")
403
- tts_model.to('cpu')
404
- # No need to move voicepack back, it's loaded to CPU initially
405
- except Exception as move_back_err:
406
- print(f"[TTS Generate] Warning: Could not move TTS model back to CPU: {move_back_err}")
407
-
408
-
409
- # Process output (remains same)
410
- if isinstance(audio_data, torch.Tensor): audio_np = audio_data.detach().cpu().numpy()
411
- elif isinstance(audio_data, np.ndarray): audio_np = audio_data
412
- else: print("[TTS Generate] Warning: Unexpected audio data type."); return None
 
413
  audio_np = audio_np.flatten().astype(np.float32)
414
-
415
- end_time = time.time()
416
- print(f"[TTS Generate] Audio generated in {end_time - start_time:.2f}s. Shape: {audio_np.shape}")
417
- return (TTS_SAMPLE_RATE, audio_np)
 
 
418
 
419
  except Exception as e:
420
- print(f"[TTS Generate] Error: {str(e)}")
421
- print(traceback.format_exc())
422
- return None
423
 
424
- # Voice ID mapping remains same
425
  def get_voice_id_from_display(voice_display_name: str) -> str:
 
426
  return VOICE_CHOICES.get(voice_display_name, 'af')
427
 
428
-
429
- # --- Gradio Interaction Logic (SYNCHRONOUS) ---
430
  ChatHistoryType = List[Dict[str, Optional[str]]]
431
 
432
  def handle_interaction(
433
  query: str,
434
  history: ChatHistoryType,
435
  selected_voice_display_name: str
436
- ) -> Tuple[ChatHistoryType, str, str, Optional[Tuple[int, np.ndarray]], Any]: # Return type matches outputs
437
  """Synchronous function to handle user queries for ZeroGPU."""
438
- print(f"\n--- Handling Query (Sync) ---")
439
- query = query.strip()
440
  print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
 
441
 
442
- if not query:
443
- print("Empty query received.")
444
- # Return initial state immediately
445
- return history, "*Please enter a non-empty query.*", "<div class='no-sources'>Enter a query to search.</div>", None, gr.Button(value="Search", interactive=True)
446
-
447
- # Initial state updates (won't be seen until the end in Gradio)
448
- current_history: ChatHistoryType = history + [{"role": "user", "content": query}]
449
- current_history.append({"role": "assistant", "content": "*Processing... Please wait.*"}) # Placeholder
450
- status_update = "*Processing... Please wait.*"
451
- sources_html = "<div class='searching'><span>Searching & Processing...</span></div>"
452
- audio_data = None
453
- button_update = gr.Button(value="Processing...", interactive=False) # Disabled during processing
454
 
455
- # --- Start Blocking Operations ---
456
  try:
457
- # 1. Perform Web Search (Sync)
458
- print("[Handler] Performing web search...")
459
- web_results = get_web_results_sync(query)
460
- sources_html = format_sources_html(web_results) # Update sources now
461
-
462
- # 2. Generate LLM Answer (Sync, Decorated)
463
- print("[Handler] Generating LLM answer...")
464
- status_update = "*Generating answer...*" # Update status text
465
- # (UI won't update here yet)
466
  llm_prompt = format_llm_prompt(query, web_results)
467
- final_answer = generate_llm_answer(llm_prompt) # This call triggers GPU attachment
468
- status_update = final_answer # Answer generated
469
 
470
- # 3. Generate TTS Speech (Sync, Decorated, Optional)
471
  tts_status_message = ""
472
- if TTS_ENABLED and not final_answer.startswith("Error"):
473
- print("[Handler] Generating TTS speech...")
474
- status_update += "\n\n*(Generating audio...)*" # Append status
475
- # (UI won't update here yet)
476
  voice_id = get_voice_id_from_display(selected_voice_display_name)
477
- audio_data = generate_tts_speech(final_answer, voice_id) # This call triggers GPU attachment
478
- if audio_data is None:
479
- tts_status_message = "\n\n*(Audio generation failed)*"
480
- elif not TTS_ENABLED:
481
- if tts_setup_thread.is_alive(): tts_status_message = "\n\n*(TTS initializing...)*"
482
- else: tts_status_message = "\n\n*(TTS unavailable)*"
483
-
484
- # Combine final answer with status
485
  final_answer_with_status = final_answer + tts_status_message
486
  status_update = final_answer_with_status
487
- current_history[-1]["content"] = final_answer_with_status # Update history
488
 
489
- button_update = gr.Button(value="Search", interactive=True) # Re-enable button
490
  print("--- Query Handling Complete (Sync) ---")
491
 
492
  except Exception as e:
493
- print(f"[Handler] Error during processing: {e}")
494
- print(traceback.format_exc())
495
- error_message = f"*An error occurred: {e}*"
496
- current_history[-1]["content"] = error_message # Update history with error
497
- status_update = error_message
498
- sources_html = "<div class='error'>Request failed.</div>"
499
- audio_data = None
500
- button_update = gr.Button(value="Search", interactive=True) # Re-enable button on error
501
-
502
- # Return the final state tuple for all outputs
503
- return current_history, status_update, sources_html, audio_data, button_update
504
 
 
 
505
 
506
  # --- Gradio UI Definition ---
507
- # (CSS remains the same)
508
  css = """
509
  /* ... [Your existing refined CSS] ... */
510
  .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
@@ -523,17 +375,17 @@ css = """
523
  .search-box button:hover { background: #1d4ed8 !important; }
524
  .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
525
  .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
526
- .answer-box { /* Now used for status/final text */ background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1rem; color: #1f2937; margin-bottom: 0.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.05); min-height: 50px;}
527
  .answer-box p { color: #374151; line-height: 1.7; margin:0;}
528
  .answer-box code { background: #f3f4f6; border-radius: 4px; padding: 2px 4px; color: #4b5563; font-size: 0.9em; }
529
  .sources-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; }
530
  .sources-box h3 { margin-top: 0; margin-bottom: 1rem; color: #111827; font-size: 1.2rem; }
531
  .sources-container { margin-top: 0; }
532
- .source-item { display: flex; padding: 10px 0; margin: 0; border-bottom: 1px solid #f3f4f6; transition: background-color 0.2s; }
533
  .source-item:last-child { border-bottom: none; }
534
  .source-number { font-weight: bold; margin-right: 12px; color: #6b7280; width: 20px; text-align: right; flex-shrink: 0;}
535
- .source-content { flex: 1; min-width: 0;} /* Allow content to shrink */
536
- .source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; transition: all 0.2s; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
537
  .source-title:hover { color: #1d4ed8; text-decoration: underline; }
538
  .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
539
  .chat-history { max-height: 500px; overflow-y: auto; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; scrollbar-width: thin; scrollbar-color: #d1d5db #f9fafb; }
@@ -542,25 +394,13 @@ css = """
542
  .chat-history::-webkit-scrollbar-track { background: #f9fafb; }
543
  .chat-history::-webkit-scrollbar-thumb { background-color: #d1d5db; border-radius: 20px; }
544
  .examples-container { background: #f9fafb; border-radius: 8px; padding: 1rem; margin-top: 1rem; border: 1px solid #e5e7eb; }
545
- .examples-container button { background: white !important; border: 1px solid #d1d5db !important; color: #374151 !important; transition: all 0.2s; margin: 4px !important; font-size: 0.9em !important; padding: 6px 12px !important; border-radius: 4px !important; }
546
  .examples-container button:hover { background: #f3f4f6 !important; border-color: #adb5bd !important; }
547
  .markdown-content { color: #374151 !important; font-size: 1rem; line-height: 1.7; }
548
- .markdown-content h1, .markdown-content h2, .markdown-content h3 { color: #111827 !important; margin-top: 1.2em !important; margin-bottom: 0.6em !important; font-weight: 600; }
549
- .markdown-content h1 { font-size: 1.6em !important; border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em; }
550
- .markdown-content h2 { font-size: 1.4em !important; border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em;}
551
- .markdown-content h3 { font-size: 1.2em !important; }
552
- .markdown-content a { color: #2563eb !important; text-decoration: none !important; transition: all 0.2s; }
553
- .markdown-content a:hover { color: #1d4ed8 !important; text-decoration: underline !important; }
554
- .markdown-content code { background: #f3f4f6 !important; padding: 2px 6px !important; border-radius: 4px !important; font-family: monospace !important; color: #4b5563; font-size: 0.9em; }
555
- .markdown-content pre { background: #f3f4f6 !important; padding: 12px !important; border-radius: 8px !important; overflow-x: auto !important; border: 1px solid #e5e7eb;}
556
- .markdown-content pre code { background: transparent !important; padding: 0 !important; border: none !important; font-size: 0.9em;}
557
- .markdown-content blockquote { border-left: 4px solid #d1d5db !important; padding-left: 1em !important; margin-left: 0 !important; color: #6b7280 !important; }
558
- .markdown-content table { border-collapse: collapse !important; width: 100% !important; margin: 1em 0; }
559
- .markdown-content th, .markdown-content td { padding: 8px 12px !important; border: 1px solid #d1d5db !important; text-align: left;}
560
- .markdown-content th { background: #f9fafb !important; font-weight: 600; }
561
  .voice-selector { margin: 0; padding: 0; height: 100%; }
562
  .voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
563
- .voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; transition: all 0.2s; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
564
  .voice-selector select:focus { border-color: #2563eb !important; box-shadow: none !important; z-index: 1; position: relative;}
565
  .audio-player { margin-top: 1rem; background: #f9fafb !important; border-radius: 8px !important; padding: 0.5rem !important; border: 1px solid #e5e7eb;}
566
  .audio-player audio { width: 100% !important; }
@@ -570,119 +410,35 @@ css = """
570
  .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
571
  @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
572
  .searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
573
- /* Dark Mode Styles */
574
  .dark .gradio-container { background-color: #111827 !important; }
575
- .dark #header { background: linear-gradient(135deg, #1f2937, #374151); }
576
- .dark #header h3 { color: #9ca3af; }
577
- .dark .search-container { background: #1f2937; border-color: #374151; }
578
- .dark .search-box input[type="text"] { background: #374151 !important; border-color: #4b5563 !important; color: #e5e7eb !important; }
579
- .dark .search-box input[type="text"]:focus { border-color: #3b82f6 !important; background: #4b5563 !important; box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3) !important; }
580
- .dark .search-box input[type="text"]::placeholder { color: #9ca3af !important; }
581
- .dark .search-box button { background: #3b82f6 !important; }
582
- .dark .search-box button:hover { background: #2563eb !important; }
583
- .dark .search-box button:disabled { background: #4b5563 !important; }
584
- .dark .answer-box { background: #1f2937; border-color: #374151; color: #e5e7eb; }
585
- .dark .answer-box p { color: #d1d5db; }
586
- .dark .answer-box code { background: #374151; color: #9ca3af; }
587
- .dark .sources-box { background: #1f2937; border-color: #374151; }
588
- .dark .sources-box h3 { color: #f9fafb; }
589
- .dark .source-item { border-bottom-color: #374151; }
590
- .dark .source-item:hover { background-color: #374151; }
591
- .dark .source-number { color: #9ca3af; }
592
- .dark .source-title { color: #60a5fa; }
593
- .dark .source-title:hover { color: #93c5fd; }
594
- .dark .source-snippet { color: #d1d5db; }
595
- .dark .chat-history { background: #374151; border-color: #4b5563; scrollbar-color: #4b5563 #374151; color: #d1d5db;}
596
- .dark .chat-history::-webkit-scrollbar-track { background: #374151; }
597
- .dark .chat-history::-webkit-scrollbar-thumb { background-color: #4b5563; }
598
- .dark .examples-container { background: #374151; border-color: #4b5563; }
599
- .dark .examples-container button { background: #1f2937 !important; border-color: #4b5563 !important; color: #d1d5db !important; }
600
- .dark .examples-container button:hover { background: #4b5563 !important; border-color: #6b7280 !important; }
601
- .dark .markdown-content { color: #d1d5db !important; }
602
- .dark .markdown-content h1, .dark .markdown-content h2, .dark .markdown-content h3 { color: #f9fafb !important; border-bottom-color: #4b5563; }
603
- .dark .markdown-content a { color: #60a5fa !important; }
604
- .dark .markdown-content a:hover { color: #93c5fd !important; }
605
- .dark .markdown-content code { background: #374151 !important; color: #9ca3af; }
606
- .dark .markdown-content pre { background: #374151 !important; border-color: #4b5563;}
607
- .dark .markdown-content pre code { background: transparent !important; }
608
- .dark .markdown-content blockquote { border-left-color: #4b5563 !important; color: #9ca3af !important; }
609
- .dark .markdown-content th, .dark .markdown-content td { border-color: #4b5563 !important; }
610
- .dark .markdown-content th { background: #374151 !important; }
611
- .dark .voice-selector select { background: #1f2937 !important; color: #d1d5db !important; border-color: #4b5563 !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important;}
612
- .dark .voice-selector select:focus { border-color: #3b82f6 !important; }
613
- .dark .audio-player { background: #374151 !important; border-color: #4b5563;}
614
- .dark .audio-player audio::-webkit-media-controls-panel { background-color: #374151; }
615
- .dark .audio-player audio::-webkit-media-controls-play-button { color: #d1d5db; }
616
- .dark .audio-player audio::-webkit-media-controls-current-time-display { color: #9ca3af; }
617
- .dark .audio-player audio::-webkit-media-controls-time-remaining-display { color: #9ca3af; }
618
- .dark .searching { background: #1e3a8a; color: #93c5fd; border-color: #3b82f6; }
619
- .dark .error { background: #7f1d1d; color: #fca5a5; border-color: #ef4444; }
620
- .dark .no-sources { background: #374151; color: #9ca3af; border-color: #4b5563;}
621
  """
622
 
623
  with gr.Blocks(title="AI Search Assistant (ZeroGPU Sync)", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
624
  chat_history_state = gr.State([])
625
-
626
  with gr.Column():
627
- with gr.Column(elem_id="header"):
628
- gr.Markdown("# ๐Ÿ” AI Search Assistant (ZeroGPU Version)")
629
- gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
630
- gr.Markdown("*(UI will block during processing for ZeroGPU compatibility)*")
631
-
632
  with gr.Column(elem_classes="search-container"):
633
  with gr.Row(elem_classes="search-box"):
634
  search_input = gr.Textbox(label="", placeholder="Ask anything...", scale=5, container=False)
635
  voice_select = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), value=list(VOICE_CHOICES.keys())[0], label="", scale=1, min_width=180, container=False, elem_classes="voice-selector")
636
  search_btn = gr.Button("Search", variant="primary", scale=0, min_width=100)
637
-
638
  with gr.Row(elem_classes="results-container"):
639
  with gr.Column(scale=3):
640
- chatbot_display = gr.Chatbot(
641
- label="Conversation", bubble_full_width=True, height=500,
642
- elem_classes="chat-history", type="messages", show_label=False,
643
- avatar_images=(None, os.path.join(KOKORO_PATH, "icon.png") if os.path.exists(os.path.join(KOKORO_PATH, "icon.png")) else "https://huggingface.co/spaces/gradio/chatbot-streaming/resolve/main/avatar.png")
644
- )
645
- # This Markdown will only show the *final* status/answer text
646
- answer_status_output = gr.Markdown(value="*Enter a query to start.*", elem_classes="answer-box markdown-content")
647
  audio_player = gr.Audio(label="Voice Response", type="numpy", autoplay=False, show_label=False, elem_classes="audio-player")
648
-
649
  with gr.Column(scale=2):
650
- with gr.Column(elem_classes="sources-box"):
651
- gr.Markdown("### Sources")
652
- sources_output_html = gr.HTML(value="<div class='no-sources'>Sources will appear here.</div>")
653
-
654
- with gr.Row(elem_classes="examples-container"):
655
- gr.Examples(
656
- examples=[ "Latest news about renewable energy", "Explain Large Language Models (LLMs)",
657
- "Symptoms and prevention tips for the flu", "Compare Python and JavaScript",
658
- "Summarize the Paris Agreement", ],
659
- inputs=search_input, label="Try these examples:",
660
- )
661
-
662
- # --- Event Handling Setup (Synchronous) ---
663
  event_inputs = [search_input, chat_history_state, voice_select]
664
- event_outputs = [ chatbot_display, answer_status_output, sources_output_html,
665
- audio_player, search_btn ]
666
-
667
- # Connect the SYNCHRONOUS handle_interaction function directly
668
- search_btn.click(
669
- fn=handle_interaction, # Use the synchronous handler
670
- inputs=event_inputs,
671
- outputs=event_outputs
672
- )
673
- search_input.submit(
674
- fn=handle_interaction, # Use the synchronous handler
675
- inputs=event_inputs,
676
- outputs=event_outputs
677
- )
678
 
679
- # --- Main Execution ---
680
  if __name__ == "__main__":
681
  print("Starting Gradio application (Synchronous for ZeroGPU)...")
682
- # Ensure TTS setup thread has a chance to start
683
- time.sleep(1) # Small delay might help see initial TTS logs
684
- demo.queue(max_size=20).launch(
685
- debug=True,
686
- share=True,
687
- )
688
  print("Gradio application stopped.")
 
10
  import numpy as np
11
  from typing import List, Dict, Tuple, Any, Optional, Union
12
  from functools import lru_cache
13
+ # No asyncio needed
14
  import threading
15
+ # No ThreadPoolExecutor needed
16
  import warnings
17
  import traceback # For detailed error logging
18
  import re # For text cleaning
 
30
  TEMPERATURE = 0.7
31
  TOP_P = 0.95
32
  KOKORO_PATH = 'Kokoro-82M'
33
+ LLM_GPU_DURATION = 120 # Seconds
34
+ TTS_GPU_DURATION = 60 # Seconds
 
35
 
36
  # --- Initialization ---
 
37
  warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
38
  warnings.filterwarnings("ignore", message="Backend 'inductor' is not available.")
39
 
40
  # --- LLM Initialization ---
41
  llm_model: Optional[AutoModelForCausalLM] = None
42
  llm_tokenizer: Optional[AutoTokenizer] = None
 
 
43
  try:
44
  print("[LLM Init] Initializing Language Model...")
45
  llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
46
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
47
+ llm_device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
48
  torch_dtype = torch.float16 if llm_device == "cuda" else torch.float32
 
49
  device_map = "auto"
50
  print(f"[LLM Init] Preparing model load (target device via ZeroGPU: cuda, dtype={torch_dtype})")
 
51
  llm_model = AutoModelForCausalLM.from_pretrained(
52
+ MODEL_NAME, device_map=device_map, low_cpu_mem_usage=True, torch_dtype=torch_dtype,
 
 
 
53
  )
54
+ print(f"[LLM Init] LLM loaded configuration successfully.")
55
  llm_model.eval()
 
56
  except Exception as e:
57
  print(f"[LLM Init] FATAL: Error initializing LLM model: {str(e)}")
58
+ print(traceback.format_exc()); llm_model = None; llm_tokenizer = None
 
 
59
  print("[LLM Init] LLM features will be unavailable.")
60
 
 
61
  # --- TTS Initialization ---
62
+ VOICE_CHOICES = { '๐Ÿ‡บ๐Ÿ‡ธ Female (Default)': 'af', '๐Ÿ‡บ๐Ÿ‡ธ Bella': 'af_bella', '๐Ÿ‡บ๐Ÿ‡ธ Sarah': 'af_sarah', '๐Ÿ‡บ๐Ÿ‡ธ Nicole': 'af_nicole' }
 
 
 
 
 
 
63
  TTS_ENABLED = False
64
  tts_model: Optional[Any] = None
65
  voicepacks: Dict[str, Any] = {}
 
70
  print(f"Running command: {' '.join(cmd)}")
71
  try:
72
  result = subprocess.run(cmd, check=check, capture_output=True, text=True, cwd=cwd, timeout=timeout)
73
+ # Print output more selectively
74
  if not check or result.returncode != 0:
75
+ if result.stdout: print(f" Stdout: {result.stdout.strip()}")
76
+ if result.stderr: print(f" Stderr: {result.stderr.strip()}")
77
  elif result.returncode == 0 and ('clone' in cmd or 'pull' in cmd or 'install' in cmd):
78
+ print(f" Command successful.")
79
  return result
80
+ except FileNotFoundError: print(f" Error: Command not found - {cmd[0]}"); raise
81
+ except subprocess.TimeoutExpired: print(f" Error: Command timed out - {' '.join(cmd)}"); raise
 
 
 
 
82
  except subprocess.CalledProcessError as e:
83
  print(f" Error running command: {' '.join(e.cmd)} (Code: {e.returncode})")
84
  if e.stdout: print(f" Stdout: {e.stdout.strip()}")
 
86
  raise
87
 
88
  def setup_tts_task():
89
+ """Initializes Kokoro TTS model and dependencies (runs in background)."""
90
  global TTS_ENABLED, tts_model, voicepacks, tts_device
91
  print("[TTS Setup] Starting background initialization...")
92
+ tts_device_target = "cuda" # Target device when GPU is attached by decorator
93
+ print(f"[TTS Setup] Target device for TTS model (via @spaces.GPU): {tts_device_target}")
 
 
 
 
94
  can_sudo = shutil.which('sudo') is not None
95
  apt_cmd_prefix = ['sudo'] if can_sudo else []
96
  absolute_kokoro_path = os.path.abspath(KOKORO_PATH)
 
97
  try:
98
  # 1. Clone/Update Repo
99
  if not os.path.exists(absolute_kokoro_path):
100
+ print(f"[TTS Setup] Cloning repository to {absolute_kokoro_path}...")
101
+ try: _run_subprocess(['git', 'lfs', 'install', '--system', '--skip-repo'])
102
+ except Exception as lfs_err: print(f"[TTS Setup] Warning: git lfs install failed: {lfs_err}")
103
+ _run_subprocess(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', absolute_kokoro_path])
104
+ try: _run_subprocess(['git', 'lfs', 'pull'], cwd=absolute_kokoro_path)
105
+ except Exception as lfs_pull_err: print(f"[TTS Setup] Warning: git lfs pull failed: {lfs_pull_err}")
106
+ else: print(f"[TTS Setup] Directory {absolute_kokoro_path} already exists.")
 
 
107
 
108
  # 2. Install espeak
109
  print("[TTS Setup] Checking/Installing espeak...")
110
+ try:
111
+ _run_subprocess(apt_cmd_prefix + ['apt-get', 'update', '-qq'])
112
+ _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak-ng'])
113
+ print("[TTS Setup] espeak-ng installed or already present.")
114
  except Exception:
115
+ print("[TTS Setup] espeak-ng installation failed, trying espeak...")
116
+ try: _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak']); print("[TTS Setup] espeak installed or already present.")
117
+ except Exception as espeak_err: print(f"[TTS Setup] ERROR: Failed to install espeak: {espeak_err}. TTS disabled."); return
 
 
 
 
118
 
119
  # 3. Load Kokoro Model and Voices
120
  sys_path_updated = False
121
  if os.path.exists(absolute_kokoro_path):
122
+ print(f"[TTS Setup] Checking contents of: {absolute_kokoro_path}");
123
+ try: print(f"[TTS Setup] Contents: {os.listdir(absolute_kokoro_path)}")
124
+ except OSError as list_err: print(f"[TTS Setup] Warning: Could not list directory contents: {list_err}")
125
+ if absolute_kokoro_path not in sys.path: sys.path.insert(0, absolute_kokoro_path); sys_path_updated = True; print(f"[TTS Setup] Temporarily added {absolute_kokoro_path} to sys.path.")
126
+ try:
127
+ print("[TTS Setup] Attempting to import Kokoro modules...")
128
+ from models import build_model
129
+ from kokoro import generate as generate_tts_internal
130
+ print("[TTS Setup] Kokoro modules imported successfully.")
131
+ globals()['build_model'] = build_model; globals()['generate_tts_internal'] = generate_tts_internal
132
+ model_file = os.path.join(absolute_kokoro_path, 'kokoro-v0_19.pth')
133
+ if not os.path.exists(model_file): print(f"[TTS Setup] ERROR: Model file {model_file} not found. TTS disabled."); return
134
+ print(f"[TTS Setup] Loading TTS model config from {model_file} (to CPU first)...")
135
+ tts_model = build_model(model_file, 'cpu'); tts_model.eval(); print("[TTS Setup] TTS model structure loaded (CPU).")
136
+ loaded_voices = 0
137
+ for voice_name, voice_id in VOICE_CHOICES.items():
138
+ vp_path = os.path.join(absolute_kokoro_path, 'voices', f'{voice_id}.pt')
139
+ if os.path.exists(vp_path):
140
+ try: voicepacks[voice_id] = torch.load(vp_path, map_location='cpu'); loaded_voices += 1; print(f"[TTS Setup] Loaded voice: {voice_id} ({voice_name}) to CPU")
141
+ except Exception as e: print(f"[TTS Setup] Warning: Failed to load voice {voice_id}: {str(e)}")
142
+ else: print(f"[TTS Setup] Info: Voice file {vp_path} not found.")
143
+ if loaded_voices == 0: print("[TTS Setup] ERROR: No voicepacks loaded. TTS disabled."); tts_model = None; return
144
+ TTS_ENABLED = True; print(f"[TTS Setup] Initialization successful. {loaded_voices} voices loaded. TTS Enabled: {TTS_ENABLED}")
145
+ except ImportError as ie: print(f"[TTS Setup] ERROR: Failed to import Kokoro modules: {ie}."); print(traceback.format_exc())
146
+ except Exception as load_err: print(f"[TTS Setup] ERROR: Exception during TTS loading: {load_err}. TTS disabled."); print(traceback.format_exc())
147
+ finally:
148
+ if sys_path_updated: # Cleanup sys.path
149
+ try:
150
+ if sys.path[0] == absolute_kokoro_path: sys.path.pop(0)
151
+ elif absolute_kokoro_path in sys.path: sys.path.remove(absolute_kokoro_path)
152
+ print(f"[TTS Setup] Cleaned up sys.path.")
153
+ except Exception as cleanup_err: print(f"[TTS Setup] Warning: Error cleaning sys.path: {cleanup_err}")
154
+ else: print(f"[TTS Setup] ERROR: Directory {absolute_kokoro_path} not found. TTS disabled.")
155
+ except Exception as e: print(f"[TTS Setup] ERROR: Unexpected error during setup: {str(e)}"); print(traceback.format_exc()); TTS_ENABLED = False; tts_model = None; voicepacks.clear()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
 
157
  print("Starting TTS setup thread...")
158
  tts_setup_thread = threading.Thread(target=setup_tts_task, daemon=True)
159
  tts_setup_thread.start()
160
 
161
+ # --- Core Logic Functions (Synchronous + @spaces.GPU) ---
 
 
 
162
  @lru_cache(maxsize=128)
163
  def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
164
  """Synchronous web search function with caching."""
 
165
  print(f"[Web Search] Searching (sync): '{query}' (max_results={max_results})")
166
  try:
167
  with DDGS() as ddgs:
168
  results = list(ddgs.text(query, max_results=max_results, safesearch='moderate', timelimit='y'))
169
  print(f"[Web Search] Found {len(results)} results.")
170
+ formatted = [{"id": i + 1, "title": res.get("title", "No Title"), "snippet": res.get("body", "No Snippet"), "url": res.get("href", "#")} for i, res in enumerate(results)]
 
 
 
171
  return formatted
172
+ except Exception as e: print(f"[Web Search] Error: {e}"); return []
 
173
 
 
174
  def format_llm_prompt(query: str, context: List[Dict[str, Any]]) -> str:
175
  """Formats the prompt for the LLM."""
 
176
  current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
177
+ context_str = "\n\n".join([f"[{res['id']}] {html.escape(res['title'])}\n{html.escape(res['snippet'])}" for res in context]) if context else "No relevant web context found."
178
+ return f"""SYSTEM: You are a helpful AI assistant. Answer the user's query based *only* on the provided web search context. Cite sources using bracket notation like [1], [2]. If the context is insufficient, state that clearly. Use markdown for formatting. Do not add external information. Current Time: {current_time}\n\nCONTEXT:\n---\n{context_str}\n---\n\nUSER: {html.escape(query)}\n\nASSISTANT:"""
 
 
 
 
 
 
 
179
 
 
 
 
 
 
180
  def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
181
  """Formats search results into HTML for display."""
 
182
  if not web_results: return "<div class='no-sources'>No sources found.</div>"
183
  items_html = ""
184
  for res in web_results:
185
+ title_safe = html.escape(res.get("title", "Source")); snippet_safe = html.escape(res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else "")); url = html.escape(res.get("url", "#"))
 
 
186
  items_html += f"""<div class='source-item'><div class='source-number'>[{res['id']}]</div><div class='source-content'><a href="{url}" target="_blank" class='source-title' title="{url}">{title_safe}</a><div class='source-snippet'>{snippet_safe}</div></div></div>"""
187
  return f"<div class='sources-container'>{items_html}</div>"
188
 
 
 
189
  @spaces.GPU(duration=LLM_GPU_DURATION)
190
  def generate_llm_answer(prompt: str) -> str:
191
  """Generates answer using the LLM (Synchronous, GPU-decorated)."""
192
+ if not llm_model or not llm_tokenizer: print("[LLM Generate] LLM unavailable."); return "Error: Language Model unavailable."
 
 
 
193
  print(f"[LLM Generate] Requesting generation (sync, GPU) (prompt length {len(prompt)})...")
194
  start_time = time.time()
195
  try:
196
+ # ZeroGPU context should place model on GPU here
197
+ current_device = next(llm_model.parameters()).device; print(f"[LLM Generate] Model device: {current_device}")
198
+ inputs = llm_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024, return_attention_mask=True).to(current_device)
 
 
 
 
 
 
 
 
199
  with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(llm_model.dtype == torch.float16)):
200
+ outputs = llm_model.generate(inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, pad_token_id=llm_tokenizer.eos_token_id, eos_token_id=llm_tokenizer.eos_token_id, do_sample=True, num_return_sequences=1)
201
+ output_ids = outputs[0][inputs.input_ids.shape[1]:]; answer_part = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
202
+ if not answer_part: answer_part = "*Model generated empty response.*"
203
+ end_time = time.time(); print(f"[LLM Generate] Complete in {end_time - start_time:.2f}s.")
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  return answer_part
205
+ except Exception as e: print(f"[LLM Generate] Error: {e}"); print(traceback.format_exc()); return f"Error generating answer."
206
 
 
 
 
 
 
 
 
207
  @spaces.GPU(duration=TTS_GPU_DURATION)
208
  def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple[int, np.ndarray]]:
209
+ """Generates speech using TTS model (Synchronous, GPU-decorated) with debugging."""
210
+ # 1. Check initial state
211
+ if not TTS_ENABLED: print("[TTS Generate] Skipping: TTS is not enabled."); return None
212
+ if not tts_model: print("[TTS Generate] Skipping: TTS model object is None."); return None
213
+ if 'generate_tts_internal' not in globals(): print("[TTS Generate] Skipping: generate_tts_internal not found."); return None
 
 
214
 
215
+ print(f"[TTS Generate] Requesting speech (sync, GPU) for text (len {len(text)}), req voice '{voice_id}'...")
216
  start_time = time.time()
217
 
218
+ # 2. Check input text validity
219
+ if not text or not text.strip() or text.startswith("Error:") or text.startswith("*Model"):
220
+ print(f"[TTS Generate] Skipping: Invalid/empty text: '{text[:100]}...'")
221
+ return None
222
+
223
  try:
224
+ # 3. Verify and select voice pack
225
  actual_voice_id = voice_id
226
  if voice_id not in voicepacks:
227
+ print(f"[TTS Generate] Warn: Voice '{voice_id}' missing. Trying 'af'. Available: {list(voicepacks.keys())}")
228
  actual_voice_id = 'af'
229
+ if 'af' not in voicepacks: print("[TTS Generate] Error: Default voice 'af' missing."); return None
230
+ print(f"[TTS Generate] Using voice_id: {actual_voice_id}")
231
+ voice_pack_data = voicepacks[actual_voice_id]
232
+ if voice_pack_data is None: print(f"[TTS Generate] Error: Voice pack data for '{actual_voice_id}' is None."); return None
 
 
 
 
 
 
233
 
234
+ # 4. Clean text
235
+ clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text); clean_text = re.sub(r'```.*?```', '', clean_text, flags=re.DOTALL); clean_text = re.sub(r'`[^`]*`', '', clean_text); clean_text = re.sub(r'^\s*[\*->]\s*', '', clean_text, flags=re.MULTILINE); clean_text = re.sub(r'[\*#_]', '', clean_text); clean_text = html.unescape(clean_text); clean_text = ' '.join(clean_text.split())
236
+ print(f"[TTS Generate] Cleaned text (first 100): '{clean_text[:100]}...'")
237
  if not clean_text: print("[TTS Generate] Skipping: Text empty after cleaning."); return None
238
 
239
+ # 5. Truncate text
240
  if len(clean_text) > MAX_TTS_CHARS:
241
  print(f"[TTS Generate] Truncating cleaned text from {len(clean_text)} to {MAX_TTS_CHARS} chars.")
242
+ clean_text = clean_text[:MAX_TTS_CHARS]; last_punct = max(clean_text.rfind(p) for p in '.?!; ');
 
243
  if last_punct != -1: clean_text = clean_text[:last_punct+1]
244
  clean_text += "..."
245
 
246
+ # 6. Prepare for GPU execution
247
+ current_device = 'cuda' # Assume GPU attached by decorator
248
+ moved_voice_pack = None
249
  gen_func = globals()['generate_tts_internal']
250
+ print(f"[TTS Generate] Preparing for generation on device '{current_device}'...")
251
 
 
 
252
  try:
253
+ # 7. Move model and data to GPU
254
+ print(f" TTS model device before move: {tts_model.device if hasattr(tts_model, 'device') else 'N/A'}")
255
  tts_model.to(current_device)
256
+ print(f" TTS model device after move: {tts_model.device}")
257
+ print(" Moving voice pack data to CUDA...")
258
+ if isinstance(voice_pack_data, dict): moved_voice_pack = {k: v.to(current_device) if isinstance(v, torch.Tensor) else v for k, v in voice_pack_data.items()}
259
+ elif isinstance(voice_pack_data, torch.Tensor): moved_voice_pack = voice_pack_data.to(current_device)
260
+ else: moved_voice_pack = voice_pack_data
261
+ print(" Voice pack data moved (or assumed not tensor).")
262
+
263
+ # 8. Call the internal TTS function
264
+ print(f"[TTS Generate] Calling Kokoro generate function (language code 'eng')...")
265
+ # --- Using language code 'eng' ---
266
+ audio_data, sr = gen_func(tts_model, clean_text, moved_voice_pack, 'eng')
267
+ print(f"[TTS Generate] Kokoro function returned. Type: {type(audio_data)}, Sample Rate: {sr}")
268
+
269
+ except Exception as kokoro_err:
270
+ print(f"[TTS Generate] **** ERROR during Kokoro generate call ****: {kokoro_err}")
271
+ print(traceback.format_exc()); return None
272
  finally:
273
+ # Move model back to CPU
 
274
  try:
275
  print("[TTS Generate] Moving TTS model back to CPU...")
276
+ if tts_model is not None: tts_model.to('cpu')
277
+ except Exception as move_back_err: print(f"[TTS Generate] Warn: Could not move TTS model back to CPU: {move_back_err}")
278
+
279
+ # 9. Process output audio data
280
+ if audio_data is None: print("[TTS Generate] Kokoro function returned None."); return None
281
+ print(f"[TTS Generate] Processing audio output. Type: {type(audio_data)}")
282
+ if isinstance(audio_data, torch.Tensor):
283
+ print(f" Original Tensor shape: {audio_data.shape}, dtype: {audio_data.dtype}, device: {audio_data.device}"); audio_np = audio_data.detach().cpu().numpy()
284
+ elif isinstance(audio_data, np.ndarray):
285
+ print(f" Original Numpy shape: {audio_data.shape}, dtype: {audio_data.dtype}"); audio_np = audio_data
286
+ else: print("[TTS Generate] Error: Unexpected audio data type from Kokoro."); return None
287
  audio_np = audio_np.flatten().astype(np.float32)
288
+ print(f"[TTS Generate] Final Numpy Array shape: {audio_np.shape}, dtype: {audio_np.dtype}, min: {np.min(audio_np):.2f}, max: {np.max(audio_np):.2f}")
289
+ if np.max(np.abs(audio_np)) < 1e-4: print("[TTS Generate] Warning: Generated audio appears silent.")
290
+ end_time = time.time(); print(f"[TTS Generate] Audio generated successfully in {end_time - start_time:.2f}s.")
291
+ actual_sr = sr if isinstance(sr, int) and sr > 0 else TTS_SAMPLE_RATE
292
+ print(f"[TTS Generate] Returning audio tuple with SR={actual_sr}.")
293
+ return (actual_sr, audio_np)
294
 
295
  except Exception as e:
296
+ print(f"[TTS Generate] **** UNEXPECTED ERROR in generate_tts_speech ****: {str(e)}")
297
+ print(traceback.format_exc()); return None
 
298
 
 
299
  def get_voice_id_from_display(voice_display_name: str) -> str:
300
+ """Maps display name to voice ID."""
301
  return VOICE_CHOICES.get(voice_display_name, 'af')
302
 
303
+ # --- Gradio Interaction Logic (Synchronous) ---
 
304
  ChatHistoryType = List[Dict[str, Optional[str]]]
305
 
306
  def handle_interaction(
307
  query: str,
308
  history: ChatHistoryType,
309
  selected_voice_display_name: str
310
+ ) -> Tuple[ChatHistoryType, str, str, Optional[Tuple[int, np.ndarray]], Any]:
311
  """Synchronous function to handle user queries for ZeroGPU."""
312
+ print(f"\n--- Handling Query (Sync) ---"); query = query.strip()
 
313
  print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
314
+ if not query: print("Empty query."); return history, "*Please enter query.*", "<div class='no-sources'>Enter query.</div>", None, gr.Button(value="Search", interactive=True)
315
 
316
+ current_history: ChatHistoryType = history + [{"role": "user", "content": query}, {"role": "assistant", "content": "*Processing...*"}]
317
+ status_update = "*Processing... Please wait.*"; sources_html = "<div class='searching'><span>Searching...</span></div>"; audio_data = None
318
+ button_update = gr.Button(value="Processing...", interactive=False); final_answer = ""
 
 
 
 
 
 
 
 
 
319
 
 
320
  try:
321
+ print("[Handler] Web search..."); start_t = time.time()
322
+ web_results = get_web_results_sync(query); print(f"[Handler] Web search took {time.time()-start_t:.2f}s")
323
+ sources_html = format_sources_html(web_results)
324
+
325
+ print("[Handler] LLM generation..."); start_t = time.time()
 
 
 
 
326
  llm_prompt = format_llm_prompt(query, web_results)
327
+ final_answer = generate_llm_answer(llm_prompt); print(f"[Handler] LLM generation took {time.time()-start_t:.2f}s")
328
+ status_update = final_answer
329
 
 
330
  tts_status_message = ""
331
+ print(f"[Handler] TTS Check: Enabled={TTS_ENABLED}, Model?={tts_model is not None}")
332
+ if TTS_ENABLED and tts_model is not None and not final_answer.startswith("Error"):
333
+ print("[Handler] TTS generation..."); start_t = time.time()
 
334
  voice_id = get_voice_id_from_display(selected_voice_display_name)
335
+ audio_data = generate_tts_speech(final_answer, voice_id) # Call decorated function
336
+ print(f"[Handler] TTS generation took {time.time()-start_t:.2f}s")
337
+ print(f"[Handler] Received audio_data: type={type(audio_data)}, shape={(audio_data[1].shape if audio_data else 'N/A')}")
338
+ if audio_data is None: tts_status_message = "\n\n*(Audio generation failed)*"
339
+ elif not TTS_ENABLED or tts_model is None:
340
+ tts_status_message = "\n\n*(TTS unavailable)*" if not tts_setup_thread.is_alive() else "\n\n*(TTS initializing...)*"
341
+ else: tts_status_message = "\n\n*(Audio skipped due to answer error)*"
342
+
343
  final_answer_with_status = final_answer + tts_status_message
344
  status_update = final_answer_with_status
345
+ current_history[-1]["content"] = final_answer_with_status # Update final history item
346
 
347
+ button_update = gr.Button(value="Search", interactive=True)
348
  print("--- Query Handling Complete (Sync) ---")
349
 
350
  except Exception as e:
351
+ print(f"[Handler] Error: {e}"); print(traceback.format_exc())
352
+ error_message = f"*Error: {e}*"; current_history[-1]["content"] = error_message
353
+ status_update = error_message; sources_html = "<div class='error'>Request failed.</div>"; audio_data = None
354
+ button_update = gr.Button(value="Search", interactive=True)
 
 
 
 
 
 
 
355
 
356
+ print(f"[Handler] Returning: hist_len={len(current_history)}, status_len={len(status_update)}, sources_len={len(sources_html)}, audio?={audio_data is not None}, button_interact={button_update.interactive}")
357
+ return current_history, status_update, sources_html, audio_data, button_update
358
 
359
  # --- Gradio UI Definition ---
 
360
  css = """
361
  /* ... [Your existing refined CSS] ... */
362
  .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
 
375
  .search-box button:hover { background: #1d4ed8 !important; }
376
  .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
377
  .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
378
+ .answer-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1rem; color: #1f2937; margin-bottom: 0.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.05); min-height: 50px;}
379
  .answer-box p { color: #374151; line-height: 1.7; margin:0;}
380
  .answer-box code { background: #f3f4f6; border-radius: 4px; padding: 2px 4px; color: #4b5563; font-size: 0.9em; }
381
  .sources-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; }
382
  .sources-box h3 { margin-top: 0; margin-bottom: 1rem; color: #111827; font-size: 1.2rem; }
383
  .sources-container { margin-top: 0; }
384
+ .source-item { display: flex; padding: 10px 0; margin: 0; border-bottom: 1px solid #f3f4f6; }
385
  .source-item:last-child { border-bottom: none; }
386
  .source-number { font-weight: bold; margin-right: 12px; color: #6b7280; width: 20px; text-align: right; flex-shrink: 0;}
387
+ .source-content { flex: 1; min-width: 0;}
388
+ .source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
389
  .source-title:hover { color: #1d4ed8; text-decoration: underline; }
390
  .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
391
  .chat-history { max-height: 500px; overflow-y: auto; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; scrollbar-width: thin; scrollbar-color: #d1d5db #f9fafb; }
 
394
  .chat-history::-webkit-scrollbar-track { background: #f9fafb; }
395
  .chat-history::-webkit-scrollbar-thumb { background-color: #d1d5db; border-radius: 20px; }
396
  .examples-container { background: #f9fafb; border-radius: 8px; padding: 1rem; margin-top: 1rem; border: 1px solid #e5e7eb; }
397
+ .examples-container button { background: white !important; border: 1px solid #d1d5db !important; color: #374151 !important; margin: 4px !important; font-size: 0.9em !important; padding: 6px 12px !important; border-radius: 4px !important; cursor: pointer;}
398
  .examples-container button:hover { background: #f3f4f6 !important; border-color: #adb5bd !important; }
399
  .markdown-content { color: #374151 !important; font-size: 1rem; line-height: 1.7; }
400
+ /* ... other markdown styles ... */
 
 
 
 
 
 
 
 
 
 
 
 
401
  .voice-selector { margin: 0; padding: 0; height: 100%; }
402
  .voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
403
+ .voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
404
  .voice-selector select:focus { border-color: #2563eb !important; box-shadow: none !important; z-index: 1; position: relative;}
405
  .audio-player { margin-top: 1rem; background: #f9fafb !important; border-radius: 8px !important; padding: 0.5rem !important; border: 1px solid #e5e7eb;}
406
  .audio-player audio { width: 100% !important; }
 
410
  .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
411
  @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
412
  .searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
413
+ /* Dark Mode Styles (optional) */
414
  .dark .gradio-container { background-color: #111827 !important; }
415
+ /* ... other dark mode rules ... */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  """
417
 
418
  with gr.Blocks(title="AI Search Assistant (ZeroGPU Sync)", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
419
  chat_history_state = gr.State([])
 
420
  with gr.Column():
421
+ with gr.Column(elem_id="header"): gr.Markdown("# ๐Ÿ” AI Search Assistant (ZeroGPU)\n### (UI blocks during processing)")
 
 
 
 
422
  with gr.Column(elem_classes="search-container"):
423
  with gr.Row(elem_classes="search-box"):
424
  search_input = gr.Textbox(label="", placeholder="Ask anything...", scale=5, container=False)
425
  voice_select = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), value=list(VOICE_CHOICES.keys())[0], label="", scale=1, min_width=180, container=False, elem_classes="voice-selector")
426
  search_btn = gr.Button("Search", variant="primary", scale=0, min_width=100)
 
427
  with gr.Row(elem_classes="results-container"):
428
  with gr.Column(scale=3):
429
+ chatbot_display = gr.Chatbot(label="Conversation", bubble_full_width=True, height=500, elem_classes="chat-history", type="messages", show_label=False, avatar_images=(None, os.path.join(KOKORO_PATH, "icon.png") if os.path.exists(os.path.join(KOKORO_PATH, "icon.png")) else "https://huggingface.co/spaces/gradio/chatbot-streaming/resolve/main/avatar.png"))
430
+ answer_status_output = gr.Markdown(value="*Enter query to start.*", elem_classes="answer-box markdown-content") # Shows final text
 
 
 
 
 
431
  audio_player = gr.Audio(label="Voice Response", type="numpy", autoplay=False, show_label=False, elem_classes="audio-player")
 
432
  with gr.Column(scale=2):
433
+ with gr.Column(elem_classes="sources-box"): gr.Markdown("### Sources"); sources_output_html = gr.HTML(value="<div class='no-sources'>Sources appear here.</div>")
434
+ with gr.Row(elem_classes="examples-container"): gr.Examples(examples=["Latest AI news", "Explain LLMs", "Flu symptoms/prevention", "Python vs JS", "Paris Agreement"], inputs=search_input, label="Try examples:")
 
 
 
 
 
 
 
 
 
 
 
435
  event_inputs = [search_input, chat_history_state, voice_select]
436
+ event_outputs = [ chatbot_display, answer_status_output, sources_output_html, audio_player, search_btn ]
437
+ search_btn.click(fn=handle_interaction, inputs=event_inputs, outputs=event_outputs)
438
+ search_input.submit(fn=handle_interaction, inputs=event_inputs, outputs=event_outputs)
 
 
 
 
 
 
 
 
 
 
 
439
 
 
440
  if __name__ == "__main__":
441
  print("Starting Gradio application (Synchronous for ZeroGPU)...")
442
+ time.sleep(1) # Wait for TTS setup thread
443
+ demo.queue(max_size=20).launch(debug=True, share=True)
 
 
 
 
444
  print("Gradio application stopped.")