Spaces:

sagar007
/

DeepSeekR1_Search

Runtime error

App Files Files Community

sagar007 commited on Mar 27

Commit

75a1aab

verified ·

1 Parent(s): aa6ca85

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -442

app.py CHANGED Viewed

@@ -10,9 +10,9 @@ import subprocess
 import numpy as np
 from typing import List, Dict, Tuple, Any, Optional, Union
 from functools import lru_cache
-# No asyncio needed for synchronous version
 import threading
-# No ThreadPoolExecutor needed for synchronous version
 import warnings
 import traceback # For detailed error logging
 import re # For text cleaning
@@ -30,58 +30,36 @@ MAX_NEW_TOKENS = 300
 TEMPERATURE = 0.7
 TOP_P = 0.95
 KOKORO_PATH = 'Kokoro-82M'
-# Define expected durations for ZeroGPU decorator
-LLM_GPU_DURATION = 120 # Seconds (adjust based on expected LLM generation time)
-TTS_GPU_DURATION = 45  # Seconds (adjust based on expected TTS generation time)
 # --- Initialization ---
-# Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
 warnings.filterwarnings("ignore", message="Backend 'inductor' is not available.")
 # --- LLM Initialization ---
 llm_model: Optional[AutoModelForCausalLM] = None
 llm_tokenizer: Optional[AutoTokenizer] = None
-llm_device = "cpu"
 try:
     print("[LLM Init] Initializing Language Model...")
     llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     llm_tokenizer.pad_token = llm_tokenizer.eos_token
-    # For ZeroGPU, we assume GPU will be available when needed, load with cuda preference
-    # If running locally without GPU, it might try CPU based on device_map="auto" fallback
-    llm_device = "cuda" if torch.cuda.is_available() else "cpu" # Check initial availability info
     torch_dtype = torch.float16 if llm_device == "cuda" else torch.float32
-    # device_map="auto" is generally okay, ZeroGPU handles the actual assignment during decorated function call
     device_map = "auto"
     print(f"[LLM Init] Preparing model load (target device via ZeroGPU: cuda, dtype={torch_dtype})")
     llm_model = AutoModelForCausalLM.from_pretrained(
-        MODEL_NAME,
-        device_map=device_map, # Let accelerate/ZeroGPU handle placement
-        low_cpu_mem_usage=True,
-        torch_dtype=torch_dtype,
     )
-    print(f"[LLM Init] LLM loaded configuration successfully. Ready for GPU assignment via @spaces.GPU.")
     llm_model.eval()
 except Exception as e:
     print(f"[LLM Init] FATAL: Error initializing LLM model: {str(e)}")
-    print(traceback.format_exc())
-    llm_model = None
-    llm_tokenizer = None
     print("[LLM Init] LLM features will be unavailable.")
 # --- TTS Initialization ---
-# (TTS setup remains the same, runs in background)
-VOICE_CHOICES = {
-    '🇺🇸 Female (Default)': 'af',
-    '🇺🇸 Bella': 'af_bella',
-    '🇺🇸 Sarah': 'af_sarah',
-    '🇺🇸 Nicole': 'af_nicole'
-}
 TTS_ENABLED = False
 tts_model: Optional[Any] = None
 voicepacks: Dict[str, Any] = {}
@@ -92,18 +70,15 @@ def _run_subprocess(cmd: List[str], check: bool = True, cwd: Optional[str] = Non
     print(f"Running command: {' '.join(cmd)}")
     try:
         result = subprocess.run(cmd, check=check, capture_output=True, text=True, cwd=cwd, timeout=timeout)
         if not check or result.returncode != 0:
-             if result.stdout: print(f"  Stdout: {result.stdout.strip()}")
-             if result.stderr: print(f"  Stderr: {result.stderr.strip()}")
         elif result.returncode == 0 and ('clone' in cmd or 'pull' in cmd or 'install' in cmd):
-             print(f"  Command successful.")
         return result
-    except FileNotFoundError:
-        print(f"  Error: Command not found - {cmd[0]}")
-        raise
-    except subprocess.TimeoutExpired:
-        print(f"  Error: Command timed out - {' '.join(cmd)}")
-        raise
     except subprocess.CalledProcessError as e:
         print(f"  Error running command: {' '.join(e.cmd)} (Code: {e.returncode})")
         if e.stdout: print(f"  Stdout: {e.stdout.strip()}")
@@ -111,400 +86,277 @@ def _run_subprocess(cmd: List[str], check: bool = True, cwd: Optional[str] = Non
         raise
 def setup_tts_task():
-    """Initializes Kokoro TTS model and dependencies."""
     global TTS_ENABLED, tts_model, voicepacks, tts_device
     print("[TTS Setup] Starting background initialization...")
-    # TTS device determination depends on where generate_tts_speech will run.
-    # If decorated with @spaces.GPU, it will use CUDA when called.
-    tts_device = "cuda" # Assume it will run on GPU via decorator
-    print(f"[TTS Setup] Target device for TTS model (via @spaces.GPU): {tts_device}")
     can_sudo = shutil.which('sudo') is not None
     apt_cmd_prefix = ['sudo'] if can_sudo else []
     absolute_kokoro_path = os.path.abspath(KOKORO_PATH)
     try:
         # 1. Clone/Update Repo
         if not os.path.exists(absolute_kokoro_path):
-             print(f"[TTS Setup] Cloning repository to {absolute_kokoro_path}...")
-             # (Cloning logic as before)
-             try: _run_subprocess(['git', 'lfs', 'install', '--system', '--skip-repo'])
-             except Exception as lfs_err: print(f"[TTS Setup] Warning: git lfs install failed: {lfs_err}")
-             _run_subprocess(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', absolute_kokoro_path])
-             try: _run_subprocess(['git', 'lfs', 'pull'], cwd=absolute_kokoro_path)
-             except Exception as lfs_pull_err: print(f"[TTS Setup] Warning: git lfs pull failed: {lfs_pull_err}")
-        else:
-             print(f"[TTS Setup] Directory {absolute_kokoro_path} already exists.")
         # 2. Install espeak
         print("[TTS Setup] Checking/Installing espeak...")
-        try: # (espeak install logic as before)
-             _run_subprocess(apt_cmd_prefix + ['apt-get', 'update', '-qq'])
-             _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak-ng'])
-             print("[TTS Setup] espeak-ng installed or already present.")
         except Exception:
-             print("[TTS Setup] espeak-ng installation failed, trying espeak...")
-             try:
-                 _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak'])
-                 print("[TTS Setup] espeak installed or already present.")
-             except Exception as espeak_err:
-                 print(f"[TTS Setup] ERROR: Failed to install espeak: {espeak_err}. TTS disabled.")
-                 return
         # 3. Load Kokoro Model and Voices
         sys_path_updated = False
         if os.path.exists(absolute_kokoro_path):
-             print(f"[TTS Setup] Checking contents of: {absolute_kokoro_path}")
-             try: print(f"[TTS Setup] Contents: {os.listdir(absolute_kokoro_path)}")
-             except OSError as list_err: print(f"[TTS Setup] Warning: Could not list directory contents: {list_err}")
-             if absolute_kokoro_path not in sys.path:
-                 sys.path.insert(0, absolute_kokoro_path)
-                 sys_path_updated = True
-                 print(f"[TTS Setup] Temporarily added {absolute_kokoro_path} to sys.path.")
-             try:
-                 print("[TTS Setup] Attempting to import Kokoro modules...")
-                 from models import build_model
-                 from kokoro import generate as generate_tts_internal
-                 print("[TTS Setup] Kokoro modules imported successfully.")
-                 globals()['build_model'] = build_model
-                 globals()['generate_tts_internal'] = generate_tts_internal
-                 model_file = os.path.join(absolute_kokoro_path, 'kokoro-v0_19.pth')
-                 if not os.path.exists(model_file):
-                     print(f"[TTS Setup] ERROR: Model file {model_file} not found. TTS disabled.")
-                     return
-                 # Load model onto CPU initially, ZeroGPU decorator will handle moving/using GPU
-                 print(f"[TTS Setup] Loading TTS model config from {model_file} (target device: {tts_device} via @spaces.GPU)...")
-                 # Load onto CPU first to avoid issues before GPU is attached.
-                 # The build_model function might need adjustment if it forces device placement.
-                 # Assuming build_model can load structure then decorator handles device use.
-                 # If build_model *requires* device at load, this might need adjustment.
-                 tts_model = build_model(model_file, 'cpu') # <<< Load to CPU first
-                 tts_model.eval()
-                 print("[TTS Setup] TTS model structure loaded (CPU).")
-                 # Load voices onto CPU
-                 loaded_voices = 0
-                 for voice_name, voice_id in VOICE_CHOICES.items():
-                     voice_file_path = os.path.join(absolute_kokoro_path, 'voices', f'{voice_id}.pt')
-                     if os.path.exists(voice_file_path):
-                         try:
-                             print(f"[TTS Setup] Loading voice: {voice_id} ({voice_name}) to CPU")
-                             voicepacks[voice_id] = torch.load(voice_file_path, map_location='cpu') # <<< Load to CPU
-                             loaded_voices += 1
-                         except Exception as e: print(f"[TTS Setup] Warning: Failed to load voice {voice_id}: {str(e)}")
-                     else: print(f"[TTS Setup] Info: Voice file {voice_file_path} not found.")
-                 if loaded_voices == 0:
-                     print("[TTS Setup] ERROR: No voicepacks loaded. TTS disabled.")
-                     tts_model = None; return
-                 TTS_ENABLED = True
-                 print(f"[TTS Setup] Initialization successful. {loaded_voices} voices loaded. TTS Enabled: {TTS_ENABLED}")
-             except ImportError as ie:
-                 print(f"[TTS Setup] ERROR: Failed to import Kokoro modules: {ie}.")
-                 print(traceback.format_exc())
-             except Exception as load_err:
-                 print(f"[TTS Setup] ERROR: Exception during TTS model/voice loading: {load_err}. TTS disabled.")
-                 print(traceback.format_exc())
-             finally:
-                  if sys_path_updated: # Cleanup sys.path
-                      try:
-                          if sys.path[0] == absolute_kokoro_path: sys.path.pop(0)
-                          elif absolute_kokoro_path in sys.path: sys.path.remove(absolute_kokoro_path)
-                          print(f"[TTS Setup] Cleaned up sys.path.")
-                      except Exception as cleanup_err: print(f"[TTS Setup] Warning: Error cleaning sys.path: {cleanup_err}")
-        else:
-            print(f"[TTS Setup] ERROR: Directory {absolute_kokoro_path} not found. TTS disabled.")
-    except Exception as e:
-        print(f"[TTS Setup] ERROR: Unexpected error during setup: {str(e)}")
-        print(traceback.format_exc())
-        TTS_ENABLED = False; tts_model = None; voicepacks.clear()
-# Start TTS setup thread
 print("Starting TTS setup thread...")
 tts_setup_thread = threading.Thread(target=setup_tts_task, daemon=True)
 tts_setup_thread.start()
-# --- Core Logic Functions (SYNCHRONOUS + @spaces.GPU) ---
-# Web search remains synchronous
 @lru_cache(maxsize=128)
 def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
     """Synchronous web search function with caching."""
-    # (Implementation remains the same as before)
     print(f"[Web Search] Searching (sync): '{query}' (max_results={max_results})")
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results, safesearch='moderate', timelimit='y'))
             print(f"[Web Search] Found {len(results)} results.")
-            formatted = [{
-                "id": i + 1, "title": res.get("title", "No Title"),
-                "snippet": res.get("body", "No Snippet"), "url": res.get("href", "#"),
-            } for i, res in enumerate(results)]
             return formatted
-    except Exception as e:
-        print(f"[Web Search] Error: {e}"); return []
-# Prompt formatting remains the same
 def format_llm_prompt(query: str, context: List[Dict[str, Any]]) -> str:
     """Formats the prompt for the LLM."""
-    # (Implementation remains the same as before)
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    context_str = "\n\n".join(
-        [f"[{res['id']}] {html.escape(res['title'])}\n{html.escape(res['snippet'])}" for res in context]
-    ) if context else "No relevant web context found."
-    return f"""SYSTEM: You are a helpful AI assistant. Answer the user's query based *only* on the provided web search context. Cite sources using bracket notation like [1], [2]. If the context is insufficient, state that clearly. Use markdown for formatting. Do not add external information. Current Time: {current_time}
-CONTEXT:
----
-{context_str}
----
-USER: {html.escape(query)}
-ASSISTANT:"""
-# Source formatting remains the same
 def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
     """Formats search results into HTML for display."""
-    # (Implementation remains the same as before)
     if not web_results: return "<div class='no-sources'>No sources found.</div>"
     items_html = ""
     for res in web_results:
-        title_safe = html.escape(res.get("title", "Source"))
-        snippet_safe = html.escape(res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else ""))
-        url = html.escape(res.get("url", "#"))
         items_html += f"""<div class='source-item'><div class='source-number'>[{res['id']}]</div><div class='source-content'><a href="{url}" target="_blank" class='source-title' title="{url}">{title_safe}</a><div class='source-snippet'>{snippet_safe}</div></div></div>"""
     return f"<div class='sources-container'>{items_html}</div>"
-# <<<--- ADD @spaces.GPU decorator AND MAKE SYNCHRONOUS --->>>
 @spaces.GPU(duration=LLM_GPU_DURATION)
 def generate_llm_answer(prompt: str) -> str:
     """Generates answer using the LLM (Synchronous, GPU-decorated)."""
-    if not llm_model or not llm_tokenizer:
-        print("[LLM Generate] LLM model or tokenizer not available.")
-        return "Error: Language Model is not available."
     print(f"[LLM Generate] Requesting generation (sync, GPU) (prompt length {len(prompt)})...")
     start_time = time.time()
     try:
-        # Ensure model is on the GPU (ZeroGPU should handle this)
-        # It might be safer to explicitly move model IF ZeroGPU doesn't guarantee it.
-        # Let's assume ZeroGPU handles the context for now.
-        current_device = next(llm_model.parameters()).device
-        print(f"[LLM Generate] Model currently on device: {current_device}") # Debug device
-        inputs = llm_tokenizer(
-            prompt, return_tensors="pt", padding=True, truncation=True,
-            max_length=1024, return_attention_mask=True
-        ).to(current_device) # Send input to model's device
         with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(llm_model.dtype == torch.float16)):
-            # Direct synchronous call
-            outputs = llm_model.generate(
-                inputs.input_ids,
-                attention_mask=inputs.attention_mask,
-                max_new_tokens=MAX_NEW_TOKENS,
-                temperature=TEMPERATURE, top_p=TOP_P,
-                pad_token_id=llm_tokenizer.eos_token_id,
-                eos_token_id=llm_tokenizer.eos_token_id,
-                do_sample=True, num_return_sequences=1
-            )
-        output_ids = outputs[0][inputs.input_ids.shape[1]:]
-        answer_part = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
-        if not answer_part: answer_part = "*Model generated an empty response.*"
-        end_time = time.time()
-        print(f"[LLM Generate] Generation complete in {end_time - start_time:.2f}s. Length: {len(answer_part)}")
         return answer_part
-    except Exception as e:
-        print(f"[LLM Generate] Error: {e}")
-        print(traceback.format_exc())
-        return f"Error during answer generation: Check logs."
-# <<<--- ADD @spaces.GPU decorator AND MAKE SYNCHRONOUS --->>>
 @spaces.GPU(duration=TTS_GPU_DURATION)
 def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple[int, np.ndarray]]:
-    """Generates speech using TTS model (Synchronous, GPU-decorated)."""
-    if not TTS_ENABLED or not tts_model or 'generate_tts_internal' not in globals():
-        print("[TTS Generate] Skipping: TTS not ready.")
-        return None
-    if not text or not text.strip() or text.startswith("Error:") or text.startswith("*Model"):
-        print("[TTS Generate] Skipping: Invalid or empty text.")
-        return None
-    print(f"[TTS Generate] Requesting speech (sync, GPU) (length {len(text)}, voice '{voice_id}')...")
     start_time = time.time()
     try:
         actual_voice_id = voice_id
         if voice_id not in voicepacks:
-            print(f"[TTS Generate] Warning: Voice '{voice_id}' not loaded. Trying 'af'.")
             actual_voice_id = 'af'
-            if 'af' not in voicepacks: print("[TTS Generate] Error: Default voice 'af' unavailable."); return None
-        # Clean text (same cleaning logic as before)
-        clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text)
-        clean_text = re.sub(r'```.*?```', '', clean_text, flags=re.DOTALL)
-        clean_text = re.sub(r'`[^`]*`', '', clean_text)
-        clean_text = re.sub(r'^\s*[\*->]\s*', '', clean_text, flags=re.MULTILINE)
-        clean_text = re.sub(r'[\*#_]', '', clean_text)
-        clean_text = html.unescape(clean_text)
-        clean_text = ' '.join(clean_text.split())
         if not clean_text: print("[TTS Generate] Skipping: Text empty after cleaning."); return None
         if len(clean_text) > MAX_TTS_CHARS:
             print(f"[TTS Generate] Truncating cleaned text from {len(clean_text)} to {MAX_TTS_CHARS} chars.")
-            clean_text = clean_text[:MAX_TTS_CHARS]
-            last_punct = max(clean_text.rfind(p) for p in '.?!; ')
             if last_punct != -1: clean_text = clean_text[:last_punct+1]
             clean_text += "..."
-        print(f"[TTS Generate] Generating audio for: '{clean_text[:100]}...'")
         gen_func = globals()['generate_tts_internal']
-        voice_pack_data = voicepacks[actual_voice_id]
-        # *** Crucial for ZeroGPU: Move TTS model and voicepack to CUDA within the decorated function ***
-        current_device = 'cuda' # Assume GPU is attached by decorator
         try:
-            print(f"[TTS Generate] Moving TTS model to {current_device}...")
             tts_model.to(current_device)
-            # Move voicepack data (might be a dict of tensors)
-            if isinstance(voice_pack_data, dict):
-                 moved_voice_pack = {k: v.to(current_device) if isinstance(v, torch.Tensor) else v for k, v in voice_pack_data.items()}
-            elif isinstance(voice_pack_data, torch.Tensor):
-                 moved_voice_pack = voice_pack_data.to(current_device)
-            else:
-                 moved_voice_pack = voice_pack_data # Assume not tensors if not dict/tensor
-            print(f"[TTS Generate] TTS model and voicepack on {current_device}.")
-            # Direct synchronous call on GPU
-            audio_data, _ = gen_func(tts_model, clean_text, moved_voice_pack, 'afr')
         finally:
-            # *** Optional but recommended: Move model back to CPU to free GPU memory if needed ***
-            # ZeroGPU might handle this, but explicit move-back can be safer if running locally too
             try:
                  print("[TTS Generate] Moving TTS model back to CPU...")
-                 tts_model.to('cpu')
-                 # No need to move voicepack back, it's loaded to CPU initially
-            except Exception as move_back_err:
-                 print(f"[TTS Generate] Warning: Could not move TTS model back to CPU: {move_back_err}")
-        # Process output (remains same)
-        if isinstance(audio_data, torch.Tensor): audio_np = audio_data.detach().cpu().numpy()
-        elif isinstance(audio_data, np.ndarray): audio_np = audio_data
-        else: print("[TTS Generate] Warning: Unexpected audio data type."); return None
         audio_np = audio_np.flatten().astype(np.float32)
-        end_time = time.time()
-        print(f"[TTS Generate] Audio generated in {end_time - start_time:.2f}s. Shape: {audio_np.shape}")
-        return (TTS_SAMPLE_RATE, audio_np)
     except Exception as e:
-        print(f"[TTS Generate] Error: {str(e)}")
-        print(traceback.format_exc())
-        return None
-# Voice ID mapping remains same
 def get_voice_id_from_display(voice_display_name: str) -> str:
     return VOICE_CHOICES.get(voice_display_name, 'af')
-# --- Gradio Interaction Logic (SYNCHRONOUS) ---
 ChatHistoryType = List[Dict[str, Optional[str]]]
 def handle_interaction(
     query: str,
     history: ChatHistoryType,
     selected_voice_display_name: str
-) -> Tuple[ChatHistoryType, str, str, Optional[Tuple[int, np.ndarray]], Any]: # Return type matches outputs
     """Synchronous function to handle user queries for ZeroGPU."""
-    print(f"\n--- Handling Query (Sync) ---")
-    query = query.strip()
     print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
-    if not query:
-        print("Empty query received.")
-        # Return initial state immediately
-        return history, "*Please enter a non-empty query.*", "<div class='no-sources'>Enter a query to search.</div>", None, gr.Button(value="Search", interactive=True)
-    # Initial state updates (won't be seen until the end in Gradio)
-    current_history: ChatHistoryType = history + [{"role": "user", "content": query}]
-    current_history.append({"role": "assistant", "content": "*Processing... Please wait.*"}) # Placeholder
-    status_update = "*Processing... Please wait.*"
-    sources_html = "<div class='searching'><span>Searching & Processing...</span></div>"
-    audio_data = None
-    button_update = gr.Button(value="Processing...", interactive=False) # Disabled during processing
-    # --- Start Blocking Operations ---
     try:
-        # 1. Perform Web Search (Sync)
-        print("[Handler] Performing web search...")
-        web_results = get_web_results_sync(query)
-        sources_html = format_sources_html(web_results) # Update sources now
-        # 2. Generate LLM Answer (Sync, Decorated)
-        print("[Handler] Generating LLM answer...")
-        status_update = "*Generating answer...*" # Update status text
-        # (UI won't update here yet)
         llm_prompt = format_llm_prompt(query, web_results)
-        final_answer = generate_llm_answer(llm_prompt) # This call triggers GPU attachment
-        status_update = final_answer # Answer generated
-        # 3. Generate TTS Speech (Sync, Decorated, Optional)
         tts_status_message = ""
-        if TTS_ENABLED and not final_answer.startswith("Error"):
-            print("[Handler] Generating TTS speech...")
-            status_update += "\n\n*(Generating audio...)*" # Append status
-            # (UI won't update here yet)
             voice_id = get_voice_id_from_display(selected_voice_display_name)
-            audio_data = generate_tts_speech(final_answer, voice_id) # This call triggers GPU attachment
-            if audio_data is None:
-                tts_status_message = "\n\n*(Audio generation failed)*"
-        elif not TTS_ENABLED:
-             if tts_setup_thread.is_alive(): tts_status_message = "\n\n*(TTS initializing...)*"
-             else: tts_status_message = "\n\n*(TTS unavailable)*"
-        # Combine final answer with status
         final_answer_with_status = final_answer + tts_status_message
         status_update = final_answer_with_status
-        current_history[-1]["content"] = final_answer_with_status # Update history
-        button_update = gr.Button(value="Search", interactive=True) # Re-enable button
         print("--- Query Handling Complete (Sync) ---")
     except Exception as e:
-        print(f"[Handler] Error during processing: {e}")
-        print(traceback.format_exc())
-        error_message = f"*An error occurred: {e}*"
-        current_history[-1]["content"] = error_message # Update history with error
-        status_update = error_message
-        sources_html = "<div class='error'>Request failed.</div>"
-        audio_data = None
-        button_update = gr.Button(value="Search", interactive=True) # Re-enable button on error
-    # Return the final state tuple for all outputs
-    return current_history, status_update, sources_html, audio_data, button_update
 # --- Gradio UI Definition ---
-# (CSS remains the same)
 css = """
 /* ... [Your existing refined CSS] ... */
 .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
@@ -523,17 +375,17 @@ css = """
 .search-box button:hover { background: #1d4ed8 !important; }
 .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
 .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
-.answer-box { /* Now used for status/final text */ background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1rem; color: #1f2937; margin-bottom: 0.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.05); min-height: 50px;}
 .answer-box p { color: #374151; line-height: 1.7; margin:0;}
 .answer-box code { background: #f3f4f6; border-radius: 4px; padding: 2px 4px; color: #4b5563; font-size: 0.9em; }
 .sources-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; }
 .sources-box h3 { margin-top: 0; margin-bottom: 1rem; color: #111827; font-size: 1.2rem; }
 .sources-container { margin-top: 0; }
-.source-item { display: flex; padding: 10px 0; margin: 0; border-bottom: 1px solid #f3f4f6; transition: background-color 0.2s; }
 .source-item:last-child { border-bottom: none; }
 .source-number { font-weight: bold; margin-right: 12px; color: #6b7280; width: 20px; text-align: right; flex-shrink: 0;}
-.source-content { flex: 1; min-width: 0;} /* Allow content to shrink */
-.source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; transition: all 0.2s; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
 .source-title:hover { color: #1d4ed8; text-decoration: underline; }
 .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
 .chat-history { max-height: 500px; overflow-y: auto; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; scrollbar-width: thin; scrollbar-color: #d1d5db #f9fafb; }
@@ -542,25 +394,13 @@ css = """
 .chat-history::-webkit-scrollbar-track { background: #f9fafb; }
 .chat-history::-webkit-scrollbar-thumb { background-color: #d1d5db; border-radius: 20px; }
 .examples-container { background: #f9fafb; border-radius: 8px; padding: 1rem; margin-top: 1rem; border: 1px solid #e5e7eb; }
-.examples-container button { background: white !important; border: 1px solid #d1d5db !important; color: #374151 !important; transition: all 0.2s; margin: 4px !important; font-size: 0.9em !important; padding: 6px 12px !important; border-radius: 4px !important; }
 .examples-container button:hover { background: #f3f4f6 !important; border-color: #adb5bd !important; }
 .markdown-content { color: #374151 !important; font-size: 1rem; line-height: 1.7; }
-.markdown-content h1, .markdown-content h2, .markdown-content h3 { color: #111827 !important; margin-top: 1.2em !important; margin-bottom: 0.6em !important; font-weight: 600; }
-.markdown-content h1 { font-size: 1.6em !important; border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em; }
-.markdown-content h2 { font-size: 1.4em !important; border-bottom: 1px solid #e5e7eb; padding-bottom: 0.3em;}
-.markdown-content h3 { font-size: 1.2em !important; }
-.markdown-content a { color: #2563eb !important; text-decoration: none !important; transition: all 0.2s; }
-.markdown-content a:hover { color: #1d4ed8 !important; text-decoration: underline !important; }
-.markdown-content code { background: #f3f4f6 !important; padding: 2px 6px !important; border-radius: 4px !important; font-family: monospace !important; color: #4b5563; font-size: 0.9em; }
-.markdown-content pre { background: #f3f4f6 !important; padding: 12px !important; border-radius: 8px !important; overflow-x: auto !important; border: 1px solid #e5e7eb;}
-.markdown-content pre code { background: transparent !important; padding: 0 !important; border: none !important; font-size: 0.9em;}
-.markdown-content blockquote { border-left: 4px solid #d1d5db !important; padding-left: 1em !important; margin-left: 0 !important; color: #6b7280 !important; }
-.markdown-content table { border-collapse: collapse !important; width: 100% !important; margin: 1em 0; }
-.markdown-content th, .markdown-content td { padding: 8px 12px !important; border: 1px solid #d1d5db !important; text-align: left;}
-.markdown-content th { background: #f9fafb !important; font-weight: 600; }
 .voice-selector { margin: 0; padding: 0; height: 100%; }
 .voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
-.voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; transition: all 0.2s; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
 .voice-selector select:focus { border-color: #2563eb !important; box-shadow: none !important; z-index: 1; position: relative;}
 .audio-player { margin-top: 1rem; background: #f9fafb !important; border-radius: 8px !important; padding: 0.5rem !important; border: 1px solid #e5e7eb;}
 .audio-player audio { width: 100% !important; }
@@ -570,119 +410,35 @@ css = """
 .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
 @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
 .searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
-/* Dark Mode Styles */
 .dark .gradio-container { background-color: #111827 !important; }
-.dark #header { background: linear-gradient(135deg, #1f2937, #374151); }
-.dark #header h3 { color: #9ca3af; }
-.dark .search-container { background: #1f2937; border-color: #374151; }
-.dark .search-box input[type="text"] { background: #374151 !important; border-color: #4b5563 !important; color: #e5e7eb !important; }
-.dark .search-box input[type="text"]:focus { border-color: #3b82f6 !important; background: #4b5563 !important; box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.3) !important; }
-.dark .search-box input[type="text"]::placeholder { color: #9ca3af !important; }
-.dark .search-box button { background: #3b82f6 !important; }
-.dark .search-box button:hover { background: #2563eb !important; }
-.dark .search-box button:disabled { background: #4b5563 !important; }
-.dark .answer-box { background: #1f2937; border-color: #374151; color: #e5e7eb; }
-.dark .answer-box p { color: #d1d5db; }
-.dark .answer-box code { background: #374151; color: #9ca3af; }
-.dark .sources-box { background: #1f2937; border-color: #374151; }
-.dark .sources-box h3 { color: #f9fafb; }
-.dark .source-item { border-bottom-color: #374151; }
-.dark .source-item:hover { background-color: #374151; }
-.dark .source-number { color: #9ca3af; }
-.dark .source-title { color: #60a5fa; }
-.dark .source-title:hover { color: #93c5fd; }
-.dark .source-snippet { color: #d1d5db; }
-.dark .chat-history { background: #374151; border-color: #4b5563; scrollbar-color: #4b5563 #374151; color: #d1d5db;}
-.dark .chat-history::-webkit-scrollbar-track { background: #374151; }
-.dark .chat-history::-webkit-scrollbar-thumb { background-color: #4b5563; }
-.dark .examples-container { background: #374151; border-color: #4b5563; }
-.dark .examples-container button { background: #1f2937 !important; border-color: #4b5563 !important; color: #d1d5db !important; }
-.dark .examples-container button:hover { background: #4b5563 !important; border-color: #6b7280 !important; }
-.dark .markdown-content { color: #d1d5db !important; }
-.dark .markdown-content h1, .dark .markdown-content h2, .dark .markdown-content h3 { color: #f9fafb !important; border-bottom-color: #4b5563; }
-.dark .markdown-content a { color: #60a5fa !important; }
-.dark .markdown-content a:hover { color: #93c5fd !important; }
-.dark .markdown-content code { background: #374151 !important; color: #9ca3af; }
-.dark .markdown-content pre { background: #374151 !important; border-color: #4b5563;}
-.dark .markdown-content pre code { background: transparent !important; }
-.dark .markdown-content blockquote { border-left-color: #4b5563 !important; color: #9ca3af !important; }
-.dark .markdown-content th, .dark .markdown-content td { border-color: #4b5563 !important; }
-.dark .markdown-content th { background: #374151 !important; }
-.dark .voice-selector select { background: #1f2937 !important; color: #d1d5db !important; border-color: #4b5563 !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important;}
-.dark .voice-selector select:focus { border-color: #3b82f6 !important; }
-.dark .audio-player { background: #374151 !important; border-color: #4b5563;}
-.dark .audio-player audio::-webkit-media-controls-panel { background-color: #374151; }
-.dark .audio-player audio::-webkit-media-controls-play-button { color: #d1d5db; }
-.dark .audio-player audio::-webkit-media-controls-current-time-display { color: #9ca3af; }
-.dark .audio-player audio::-webkit-media-controls-time-remaining-display { color: #9ca3af; }
-.dark .searching { background: #1e3a8a; color: #93c5fd; border-color: #3b82f6; }
-.dark .error { background: #7f1d1d; color: #fca5a5; border-color: #ef4444; }
-.dark .no-sources { background: #374151; color: #9ca3af; border-color: #4b5563;}
 """
 with gr.Blocks(title="AI Search Assistant (ZeroGPU Sync)", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
     chat_history_state = gr.State([])
     with gr.Column():
-        with gr.Column(elem_id="header"):
-            gr.Markdown("# 🔍 AI Search Assistant (ZeroGPU Version)")
-            gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
-            gr.Markdown("*(UI will block during processing for ZeroGPU compatibility)*")
         with gr.Column(elem_classes="search-container"):
             with gr.Row(elem_classes="search-box"):
                 search_input = gr.Textbox(label="", placeholder="Ask anything...", scale=5, container=False)
                 voice_select = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), value=list(VOICE_CHOICES.keys())[0], label="", scale=1, min_width=180, container=False, elem_classes="voice-selector")
                 search_btn = gr.Button("Search", variant="primary", scale=0, min_width=100)
             with gr.Row(elem_classes="results-container"):
                 with gr.Column(scale=3):
-                    chatbot_display = gr.Chatbot(
-                        label="Conversation", bubble_full_width=True, height=500,
-                        elem_classes="chat-history", type="messages", show_label=False,
-                        avatar_images=(None, os.path.join(KOKORO_PATH, "icon.png") if os.path.exists(os.path.join(KOKORO_PATH, "icon.png")) else "https://huggingface.co/spaces/gradio/chatbot-streaming/resolve/main/avatar.png")
-                    )
-                    # This Markdown will only show the *final* status/answer text
-                    answer_status_output = gr.Markdown(value="*Enter a query to start.*", elem_classes="answer-box markdown-content")
                     audio_player = gr.Audio(label="Voice Response", type="numpy", autoplay=False, show_label=False, elem_classes="audio-player")
                 with gr.Column(scale=2):
-                    with gr.Column(elem_classes="sources-box"):
-                        gr.Markdown("### Sources")
-                        sources_output_html = gr.HTML(value="<div class='no-sources'>Sources will appear here.</div>")
-            with gr.Row(elem_classes="examples-container"):
-                 gr.Examples(
-                    examples=[ "Latest news about renewable energy", "Explain Large Language Models (LLMs)",
-                               "Symptoms and prevention tips for the flu", "Compare Python and JavaScript",
-                               "Summarize the Paris Agreement", ],
-                    inputs=search_input, label="Try these examples:",
-                )
-    # --- Event Handling Setup (Synchronous) ---
     event_inputs = [search_input, chat_history_state, voice_select]
-    event_outputs = [ chatbot_display, answer_status_output, sources_output_html,
-                      audio_player, search_btn ]
-    # Connect the SYNCHRONOUS handle_interaction function directly
-    search_btn.click(
-        fn=handle_interaction, # Use the synchronous handler
-        inputs=event_inputs,
-        outputs=event_outputs
-    )
-    search_input.submit(
-        fn=handle_interaction, # Use the synchronous handler
-        inputs=event_inputs,
-        outputs=event_outputs
-    )
-# --- Main Execution ---
 if __name__ == "__main__":
     print("Starting Gradio application (Synchronous for ZeroGPU)...")
-    # Ensure TTS setup thread has a chance to start
-    time.sleep(1) # Small delay might help see initial TTS logs
-    demo.queue(max_size=20).launch(
-        debug=True,
-        share=True,
-    )
     print("Gradio application stopped.")

 import numpy as np
 from typing import List, Dict, Tuple, Any, Optional, Union
 from functools import lru_cache
+# No asyncio needed
 import threading
+# No ThreadPoolExecutor needed
 import warnings
 import traceback # For detailed error logging
 import re # For text cleaning
 TEMPERATURE = 0.7
 TOP_P = 0.95
 KOKORO_PATH = 'Kokoro-82M'
+LLM_GPU_DURATION = 120 # Seconds
+TTS_GPU_DURATION = 60  # Seconds
 # --- Initialization ---
 warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
 warnings.filterwarnings("ignore", message="Backend 'inductor' is not available.")
 # --- LLM Initialization ---
 llm_model: Optional[AutoModelForCausalLM] = None
 llm_tokenizer: Optional[AutoTokenizer] = None
 try:
     print("[LLM Init] Initializing Language Model...")
     llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     llm_tokenizer.pad_token = llm_tokenizer.eos_token
+    llm_device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if llm_device == "cuda" else torch.float32
     device_map = "auto"
     print(f"[LLM Init] Preparing model load (target device via ZeroGPU: cuda, dtype={torch_dtype})")
     llm_model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME, device_map=device_map, low_cpu_mem_usage=True, torch_dtype=torch_dtype,
     )
+    print(f"[LLM Init] LLM loaded configuration successfully.")
     llm_model.eval()
 except Exception as e:
     print(f"[LLM Init] FATAL: Error initializing LLM model: {str(e)}")
+    print(traceback.format_exc()); llm_model = None; llm_tokenizer = None
     print("[LLM Init] LLM features will be unavailable.")
 # --- TTS Initialization ---
+VOICE_CHOICES = { '🇺🇸 Female (Default)': 'af', '🇺🇸 Bella': 'af_bella', '🇺🇸 Sarah': 'af_sarah', '🇺🇸 Nicole': 'af_nicole' }
 TTS_ENABLED = False
 tts_model: Optional[Any] = None
 voicepacks: Dict[str, Any] = {}
     print(f"Running command: {' '.join(cmd)}")
     try:
         result = subprocess.run(cmd, check=check, capture_output=True, text=True, cwd=cwd, timeout=timeout)
+        # Print output more selectively
         if not check or result.returncode != 0:
+            if result.stdout: print(f"  Stdout: {result.stdout.strip()}")
+            if result.stderr: print(f"  Stderr: {result.stderr.strip()}")
         elif result.returncode == 0 and ('clone' in cmd or 'pull' in cmd or 'install' in cmd):
+            print(f"  Command successful.")
         return result
+    except FileNotFoundError: print(f"  Error: Command not found - {cmd[0]}"); raise
+    except subprocess.TimeoutExpired: print(f"  Error: Command timed out - {' '.join(cmd)}"); raise
     except subprocess.CalledProcessError as e:
         print(f"  Error running command: {' '.join(e.cmd)} (Code: {e.returncode})")
         if e.stdout: print(f"  Stdout: {e.stdout.strip()}")
         raise
 def setup_tts_task():
+    """Initializes Kokoro TTS model and dependencies (runs in background)."""
     global TTS_ENABLED, tts_model, voicepacks, tts_device
     print("[TTS Setup] Starting background initialization...")
+    tts_device_target = "cuda" # Target device when GPU is attached by decorator
+    print(f"[TTS Setup] Target device for TTS model (via @spaces.GPU): {tts_device_target}")
     can_sudo = shutil.which('sudo') is not None
     apt_cmd_prefix = ['sudo'] if can_sudo else []
     absolute_kokoro_path = os.path.abspath(KOKORO_PATH)
     try:
         # 1. Clone/Update Repo
         if not os.path.exists(absolute_kokoro_path):
+            print(f"[TTS Setup] Cloning repository to {absolute_kokoro_path}...")
+            try: _run_subprocess(['git', 'lfs', 'install', '--system', '--skip-repo'])
+            except Exception as lfs_err: print(f"[TTS Setup] Warning: git lfs install failed: {lfs_err}")
+            _run_subprocess(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', absolute_kokoro_path])
+            try: _run_subprocess(['git', 'lfs', 'pull'], cwd=absolute_kokoro_path)
+            except Exception as lfs_pull_err: print(f"[TTS Setup] Warning: git lfs pull failed: {lfs_pull_err}")
+        else: print(f"[TTS Setup] Directory {absolute_kokoro_path} already exists.")
         # 2. Install espeak
         print("[TTS Setup] Checking/Installing espeak...")
+        try:
+            _run_subprocess(apt_cmd_prefix + ['apt-get', 'update', '-qq'])
+            _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak-ng'])
+            print("[TTS Setup] espeak-ng installed or already present.")
         except Exception:
+            print("[TTS Setup] espeak-ng installation failed, trying espeak...")
+            try: _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak']); print("[TTS Setup] espeak installed or already present.")
+            except Exception as espeak_err: print(f"[TTS Setup] ERROR: Failed to install espeak: {espeak_err}. TTS disabled."); return
         # 3. Load Kokoro Model and Voices
         sys_path_updated = False
         if os.path.exists(absolute_kokoro_path):
+            print(f"[TTS Setup] Checking contents of: {absolute_kokoro_path}");
+            try: print(f"[TTS Setup] Contents: {os.listdir(absolute_kokoro_path)}")
+            except OSError as list_err: print(f"[TTS Setup] Warning: Could not list directory contents: {list_err}")
+            if absolute_kokoro_path not in sys.path: sys.path.insert(0, absolute_kokoro_path); sys_path_updated = True; print(f"[TTS Setup] Temporarily added {absolute_kokoro_path} to sys.path.")
+            try:
+                print("[TTS Setup] Attempting to import Kokoro modules...")
+                from models import build_model
+                from kokoro import generate as generate_tts_internal
+                print("[TTS Setup] Kokoro modules imported successfully.")
+                globals()['build_model'] = build_model; globals()['generate_tts_internal'] = generate_tts_internal
+                model_file = os.path.join(absolute_kokoro_path, 'kokoro-v0_19.pth')
+                if not os.path.exists(model_file): print(f"[TTS Setup] ERROR: Model file {model_file} not found. TTS disabled."); return
+                print(f"[TTS Setup] Loading TTS model config from {model_file} (to CPU first)...")
+                tts_model = build_model(model_file, 'cpu'); tts_model.eval(); print("[TTS Setup] TTS model structure loaded (CPU).")
+                loaded_voices = 0
+                for voice_name, voice_id in VOICE_CHOICES.items():
+                    vp_path = os.path.join(absolute_kokoro_path, 'voices', f'{voice_id}.pt')
+                    if os.path.exists(vp_path):
+                        try: voicepacks[voice_id] = torch.load(vp_path, map_location='cpu'); loaded_voices += 1; print(f"[TTS Setup] Loaded voice: {voice_id} ({voice_name}) to CPU")
+                        except Exception as e: print(f"[TTS Setup] Warning: Failed to load voice {voice_id}: {str(e)}")
+                    else: print(f"[TTS Setup] Info: Voice file {vp_path} not found.")
+                if loaded_voices == 0: print("[TTS Setup] ERROR: No voicepacks loaded. TTS disabled."); tts_model = None; return
+                TTS_ENABLED = True; print(f"[TTS Setup] Initialization successful. {loaded_voices} voices loaded. TTS Enabled: {TTS_ENABLED}")
+            except ImportError as ie: print(f"[TTS Setup] ERROR: Failed to import Kokoro modules: {ie}."); print(traceback.format_exc())
+            except Exception as load_err: print(f"[TTS Setup] ERROR: Exception during TTS loading: {load_err}. TTS disabled."); print(traceback.format_exc())
+            finally:
+                if sys_path_updated: # Cleanup sys.path
+                    try:
+                        if sys.path[0] == absolute_kokoro_path: sys.path.pop(0)
+                        elif absolute_kokoro_path in sys.path: sys.path.remove(absolute_kokoro_path)
+                        print(f"[TTS Setup] Cleaned up sys.path.")
+                    except Exception as cleanup_err: print(f"[TTS Setup] Warning: Error cleaning sys.path: {cleanup_err}")
+        else: print(f"[TTS Setup] ERROR: Directory {absolute_kokoro_path} not found. TTS disabled.")
+    except Exception as e: print(f"[TTS Setup] ERROR: Unexpected error during setup: {str(e)}"); print(traceback.format_exc()); TTS_ENABLED = False; tts_model = None; voicepacks.clear()
 print("Starting TTS setup thread...")
 tts_setup_thread = threading.Thread(target=setup_tts_task, daemon=True)
 tts_setup_thread.start()
+# --- Core Logic Functions (Synchronous + @spaces.GPU) ---
 @lru_cache(maxsize=128)
 def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
     """Synchronous web search function with caching."""
     print(f"[Web Search] Searching (sync): '{query}' (max_results={max_results})")
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results, safesearch='moderate', timelimit='y'))
             print(f"[Web Search] Found {len(results)} results.")
+            formatted = [{"id": i + 1, "title": res.get("title", "No Title"), "snippet": res.get("body", "No Snippet"), "url": res.get("href", "#")} for i, res in enumerate(results)]
             return formatted
+    except Exception as e: print(f"[Web Search] Error: {e}"); return []
 def format_llm_prompt(query: str, context: List[Dict[str, Any]]) -> str:
     """Formats the prompt for the LLM."""
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    context_str = "\n\n".join([f"[{res['id']}] {html.escape(res['title'])}\n{html.escape(res['snippet'])}" for res in context]) if context else "No relevant web context found."
+    return f"""SYSTEM: You are a helpful AI assistant. Answer the user's query based *only* on the provided web search context. Cite sources using bracket notation like [1], [2]. If the context is insufficient, state that clearly. Use markdown for formatting. Do not add external information. Current Time: {current_time}\n\nCONTEXT:\n---\n{context_str}\n---\n\nUSER: {html.escape(query)}\n\nASSISTANT:"""
 def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
     """Formats search results into HTML for display."""
     if not web_results: return "<div class='no-sources'>No sources found.</div>"
     items_html = ""
     for res in web_results:
+        title_safe = html.escape(res.get("title", "Source")); snippet_safe = html.escape(res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else "")); url = html.escape(res.get("url", "#"))
         items_html += f"""<div class='source-item'><div class='source-number'>[{res['id']}]</div><div class='source-content'><a href="{url}" target="_blank" class='source-title' title="{url}">{title_safe}</a><div class='source-snippet'>{snippet_safe}</div></div></div>"""
     return f"<div class='sources-container'>{items_html}</div>"
 @spaces.GPU(duration=LLM_GPU_DURATION)
 def generate_llm_answer(prompt: str) -> str:
     """Generates answer using the LLM (Synchronous, GPU-decorated)."""
+    if not llm_model or not llm_tokenizer: print("[LLM Generate] LLM unavailable."); return "Error: Language Model unavailable."
     print(f"[LLM Generate] Requesting generation (sync, GPU) (prompt length {len(prompt)})...")
     start_time = time.time()
     try:
+        # ZeroGPU context should place model on GPU here
+        current_device = next(llm_model.parameters()).device; print(f"[LLM Generate] Model device: {current_device}")
+        inputs = llm_tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024, return_attention_mask=True).to(current_device)
         with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(llm_model.dtype == torch.float16)):
+            outputs = llm_model.generate(inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, pad_token_id=llm_tokenizer.eos_token_id, eos_token_id=llm_tokenizer.eos_token_id, do_sample=True, num_return_sequences=1)
+        output_ids = outputs[0][inputs.input_ids.shape[1]:]; answer_part = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+        if not answer_part: answer_part = "*Model generated empty response.*"
+        end_time = time.time(); print(f"[LLM Generate] Complete in {end_time - start_time:.2f}s.")
         return answer_part
+    except Exception as e: print(f"[LLM Generate] Error: {e}"); print(traceback.format_exc()); return f"Error generating answer."
 @spaces.GPU(duration=TTS_GPU_DURATION)
 def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple[int, np.ndarray]]:
+    """Generates speech using TTS model (Synchronous, GPU-decorated) with debugging."""
+    # 1. Check initial state
+    if not TTS_ENABLED: print("[TTS Generate] Skipping: TTS is not enabled."); return None
+    if not tts_model: print("[TTS Generate] Skipping: TTS model object is None."); return None
+    if 'generate_tts_internal' not in globals(): print("[TTS Generate] Skipping: generate_tts_internal not found."); return None
+    print(f"[TTS Generate] Requesting speech (sync, GPU) for text (len {len(text)}), req voice '{voice_id}'...")
     start_time = time.time()
+    # 2. Check input text validity
+    if not text or not text.strip() or text.startswith("Error:") or text.startswith("*Model"):
+        print(f"[TTS Generate] Skipping: Invalid/empty text: '{text[:100]}...'")
+        return None
     try:
+        # 3. Verify and select voice pack
         actual_voice_id = voice_id
         if voice_id not in voicepacks:
+            print(f"[TTS Generate] Warn: Voice '{voice_id}' missing. Trying 'af'. Available: {list(voicepacks.keys())}")
             actual_voice_id = 'af'
+            if 'af' not in voicepacks: print("[TTS Generate] Error: Default voice 'af' missing."); return None
+        print(f"[TTS Generate] Using voice_id: {actual_voice_id}")
+        voice_pack_data = voicepacks[actual_voice_id]
+        if voice_pack_data is None: print(f"[TTS Generate] Error: Voice pack data for '{actual_voice_id}' is None."); return None
+        # 4. Clean text
+        clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text); clean_text = re.sub(r'```.*?```', '', clean_text, flags=re.DOTALL); clean_text = re.sub(r'`[^`]*`', '', clean_text); clean_text = re.sub(r'^\s*[\*->]\s*', '', clean_text, flags=re.MULTILINE); clean_text = re.sub(r'[\*#_]', '', clean_text); clean_text = html.unescape(clean_text); clean_text = ' '.join(clean_text.split())
+        print(f"[TTS Generate] Cleaned text (first 100): '{clean_text[:100]}...'")
         if not clean_text: print("[TTS Generate] Skipping: Text empty after cleaning."); return None
+        # 5. Truncate text
         if len(clean_text) > MAX_TTS_CHARS:
             print(f"[TTS Generate] Truncating cleaned text from {len(clean_text)} to {MAX_TTS_CHARS} chars.")
+            clean_text = clean_text[:MAX_TTS_CHARS]; last_punct = max(clean_text.rfind(p) for p in '.?!; ');
             if last_punct != -1: clean_text = clean_text[:last_punct+1]
             clean_text += "..."
+        # 6. Prepare for GPU execution
+        current_device = 'cuda' # Assume GPU attached by decorator
+        moved_voice_pack = None
         gen_func = globals()['generate_tts_internal']
+        print(f"[TTS Generate] Preparing for generation on device '{current_device}'...")
         try:
+            # 7. Move model and data to GPU
+            print(f"  TTS model device before move: {tts_model.device if hasattr(tts_model, 'device') else 'N/A'}")
             tts_model.to(current_device)
+            print(f"  TTS model device after move: {tts_model.device}")
+            print("  Moving voice pack data to CUDA...")
+            if isinstance(voice_pack_data, dict): moved_voice_pack = {k: v.to(current_device) if isinstance(v, torch.Tensor) else v for k, v in voice_pack_data.items()}
+            elif isinstance(voice_pack_data, torch.Tensor): moved_voice_pack = voice_pack_data.to(current_device)
+            else: moved_voice_pack = voice_pack_data
+            print("  Voice pack data moved (or assumed not tensor).")
+            # 8. Call the internal TTS function
+            print(f"[TTS Generate] Calling Kokoro generate function (language code 'eng')...")
+            # --- Using language code 'eng' ---
+            audio_data, sr = gen_func(tts_model, clean_text, moved_voice_pack, 'eng')
+            print(f"[TTS Generate] Kokoro function returned. Type: {type(audio_data)}, Sample Rate: {sr}")
+        except Exception as kokoro_err:
+            print(f"[TTS Generate] **** ERROR during Kokoro generate call ****: {kokoro_err}")
+            print(traceback.format_exc()); return None
         finally:
+            # Move model back to CPU
             try:
                  print("[TTS Generate] Moving TTS model back to CPU...")
+                 if tts_model is not None: tts_model.to('cpu')
+            except Exception as move_back_err: print(f"[TTS Generate] Warn: Could not move TTS model back to CPU: {move_back_err}")
+        # 9. Process output audio data
+        if audio_data is None: print("[TTS Generate] Kokoro function returned None."); return None
+        print(f"[TTS Generate] Processing audio output. Type: {type(audio_data)}")
+        if isinstance(audio_data, torch.Tensor):
+             print(f"  Original Tensor shape: {audio_data.shape}, dtype: {audio_data.dtype}, device: {audio_data.device}"); audio_np = audio_data.detach().cpu().numpy()
+        elif isinstance(audio_data, np.ndarray):
+             print(f"  Original Numpy shape: {audio_data.shape}, dtype: {audio_data.dtype}"); audio_np = audio_data
+        else: print("[TTS Generate] Error: Unexpected audio data type from Kokoro."); return None
         audio_np = audio_np.flatten().astype(np.float32)
+        print(f"[TTS Generate] Final Numpy Array shape: {audio_np.shape}, dtype: {audio_np.dtype}, min: {np.min(audio_np):.2f}, max: {np.max(audio_np):.2f}")
+        if np.max(np.abs(audio_np)) < 1e-4: print("[TTS Generate] Warning: Generated audio appears silent.")
+        end_time = time.time(); print(f"[TTS Generate] Audio generated successfully in {end_time - start_time:.2f}s.")
+        actual_sr = sr if isinstance(sr, int) and sr > 0 else TTS_SAMPLE_RATE
+        print(f"[TTS Generate] Returning audio tuple with SR={actual_sr}.")
+        return (actual_sr, audio_np)
     except Exception as e:
+        print(f"[TTS Generate] **** UNEXPECTED ERROR in generate_tts_speech ****: {str(e)}")
+        print(traceback.format_exc()); return None
 def get_voice_id_from_display(voice_display_name: str) -> str:
+    """Maps display name to voice ID."""
     return VOICE_CHOICES.get(voice_display_name, 'af')
+# --- Gradio Interaction Logic (Synchronous) ---
 ChatHistoryType = List[Dict[str, Optional[str]]]
 def handle_interaction(
     query: str,
     history: ChatHistoryType,
     selected_voice_display_name: str
+) -> Tuple[ChatHistoryType, str, str, Optional[Tuple[int, np.ndarray]], Any]:
     """Synchronous function to handle user queries for ZeroGPU."""
+    print(f"\n--- Handling Query (Sync) ---"); query = query.strip()
     print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
+    if not query: print("Empty query."); return history, "*Please enter query.*", "<div class='no-sources'>Enter query.</div>", None, gr.Button(value="Search", interactive=True)
+    current_history: ChatHistoryType = history + [{"role": "user", "content": query}, {"role": "assistant", "content": "*Processing...*"}]
+    status_update = "*Processing... Please wait.*"; sources_html = "<div class='searching'><span>Searching...</span></div>"; audio_data = None
+    button_update = gr.Button(value="Processing...", interactive=False); final_answer = ""
     try:
+        print("[Handler] Web search..."); start_t = time.time()
+        web_results = get_web_results_sync(query); print(f"[Handler] Web search took {time.time()-start_t:.2f}s")
+        sources_html = format_sources_html(web_results)
+        print("[Handler] LLM generation..."); start_t = time.time()
         llm_prompt = format_llm_prompt(query, web_results)
+        final_answer = generate_llm_answer(llm_prompt); print(f"[Handler] LLM generation took {time.time()-start_t:.2f}s")
+        status_update = final_answer
         tts_status_message = ""
+        print(f"[Handler] TTS Check: Enabled={TTS_ENABLED}, Model?={tts_model is not None}")
+        if TTS_ENABLED and tts_model is not None and not final_answer.startswith("Error"):
+            print("[Handler] TTS generation..."); start_t = time.time()
             voice_id = get_voice_id_from_display(selected_voice_display_name)
+            audio_data = generate_tts_speech(final_answer, voice_id) # Call decorated function
+            print(f"[Handler] TTS generation took {time.time()-start_t:.2f}s")
+            print(f"[Handler] Received audio_data: type={type(audio_data)}, shape={(audio_data[1].shape if audio_data else 'N/A')}")
+            if audio_data is None: tts_status_message = "\n\n*(Audio generation failed)*"
+        elif not TTS_ENABLED or tts_model is None:
+             tts_status_message = "\n\n*(TTS unavailable)*" if not tts_setup_thread.is_alive() else "\n\n*(TTS initializing...)*"
+        else: tts_status_message = "\n\n*(Audio skipped due to answer error)*"
         final_answer_with_status = final_answer + tts_status_message
         status_update = final_answer_with_status
+        current_history[-1]["content"] = final_answer_with_status # Update final history item
+        button_update = gr.Button(value="Search", interactive=True)
         print("--- Query Handling Complete (Sync) ---")
     except Exception as e:
+        print(f"[Handler] Error: {e}"); print(traceback.format_exc())
+        error_message = f"*Error: {e}*"; current_history[-1]["content"] = error_message
+        status_update = error_message; sources_html = "<div class='error'>Request failed.</div>"; audio_data = None
+        button_update = gr.Button(value="Search", interactive=True)
+    print(f"[Handler] Returning: hist_len={len(current_history)}, status_len={len(status_update)}, sources_len={len(sources_html)}, audio?={audio_data is not None}, button_interact={button_update.interactive}")
+    return current_history, status_update, sources_html, audio_data, button_update
 # --- Gradio UI Definition ---
 css = """
 /* ... [Your existing refined CSS] ... */
 .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
 .search-box button:hover { background: #1d4ed8 !important; }
 .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
 .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
+.answer-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1rem; color: #1f2937; margin-bottom: 0.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.05); min-height: 50px;}
 .answer-box p { color: #374151; line-height: 1.7; margin:0;}
 .answer-box code { background: #f3f4f6; border-radius: 4px; padding: 2px 4px; color: #4b5563; font-size: 0.9em; }
 .sources-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; }
 .sources-box h3 { margin-top: 0; margin-bottom: 1rem; color: #111827; font-size: 1.2rem; }
 .sources-container { margin-top: 0; }
+.source-item { display: flex; padding: 10px 0; margin: 0; border-bottom: 1px solid #f3f4f6; }
 .source-item:last-child { border-bottom: none; }
 .source-number { font-weight: bold; margin-right: 12px; color: #6b7280; width: 20px; text-align: right; flex-shrink: 0;}
+.source-content { flex: 1; min-width: 0;}
+.source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
 .source-title:hover { color: #1d4ed8; text-decoration: underline; }
 .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
 .chat-history { max-height: 500px; overflow-y: auto; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; scrollbar-width: thin; scrollbar-color: #d1d5db #f9fafb; }
 .chat-history::-webkit-scrollbar-track { background: #f9fafb; }
 .chat-history::-webkit-scrollbar-thumb { background-color: #d1d5db; border-radius: 20px; }
 .examples-container { background: #f9fafb; border-radius: 8px; padding: 1rem; margin-top: 1rem; border: 1px solid #e5e7eb; }
+.examples-container button { background: white !important; border: 1px solid #d1d5db !important; color: #374151 !important; margin: 4px !important; font-size: 0.9em !important; padding: 6px 12px !important; border-radius: 4px !important; cursor: pointer;}
 .examples-container button:hover { background: #f3f4f6 !important; border-color: #adb5bd !important; }
 .markdown-content { color: #374151 !important; font-size: 1rem; line-height: 1.7; }
+/* ... other markdown styles ... */
 .voice-selector { margin: 0; padding: 0; height: 100%; }
 .voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
+.voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
 .voice-selector select:focus { border-color: #2563eb !important; box-shadow: none !important; z-index: 1; position: relative;}
 .audio-player { margin-top: 1rem; background: #f9fafb !important; border-radius: 8px !important; padding: 0.5rem !important; border: 1px solid #e5e7eb;}
 .audio-player audio { width: 100% !important; }
 .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
 @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
 .searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
+/* Dark Mode Styles (optional) */
 .dark .gradio-container { background-color: #111827 !important; }
+/* ... other dark mode rules ... */
 """
 with gr.Blocks(title="AI Search Assistant (ZeroGPU Sync)", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
     chat_history_state = gr.State([])
     with gr.Column():
+        with gr.Column(elem_id="header"): gr.Markdown("# 🔍 AI Search Assistant (ZeroGPU)\n### (UI blocks during processing)")
         with gr.Column(elem_classes="search-container"):
             with gr.Row(elem_classes="search-box"):
                 search_input = gr.Textbox(label="", placeholder="Ask anything...", scale=5, container=False)
                 voice_select = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), value=list(VOICE_CHOICES.keys())[0], label="", scale=1, min_width=180, container=False, elem_classes="voice-selector")
                 search_btn = gr.Button("Search", variant="primary", scale=0, min_width=100)
             with gr.Row(elem_classes="results-container"):
                 with gr.Column(scale=3):
+                    chatbot_display = gr.Chatbot(label="Conversation", bubble_full_width=True, height=500, elem_classes="chat-history", type="messages", show_label=False, avatar_images=(None, os.path.join(KOKORO_PATH, "icon.png") if os.path.exists(os.path.join(KOKORO_PATH, "icon.png")) else "https://huggingface.co/spaces/gradio/chatbot-streaming/resolve/main/avatar.png"))
+                    answer_status_output = gr.Markdown(value="*Enter query to start.*", elem_classes="answer-box markdown-content") # Shows final text
                     audio_player = gr.Audio(label="Voice Response", type="numpy", autoplay=False, show_label=False, elem_classes="audio-player")
                 with gr.Column(scale=2):
+                    with gr.Column(elem_classes="sources-box"): gr.Markdown("### Sources"); sources_output_html = gr.HTML(value="<div class='no-sources'>Sources appear here.</div>")
+            with gr.Row(elem_classes="examples-container"): gr.Examples(examples=["Latest AI news", "Explain LLMs", "Flu symptoms/prevention", "Python vs JS", "Paris Agreement"], inputs=search_input, label="Try examples:")
     event_inputs = [search_input, chat_history_state, voice_select]
+    event_outputs = [ chatbot_display, answer_status_output, sources_output_html, audio_player, search_btn ]
+    search_btn.click(fn=handle_interaction, inputs=event_inputs, outputs=event_outputs)
+    search_input.submit(fn=handle_interaction, inputs=event_inputs, outputs=event_outputs)
 if __name__ == "__main__":
     print("Starting Gradio application (Synchronous for ZeroGPU)...")
+    time.sleep(1) # Wait for TTS setup thread
+    demo.queue(max_size=20).launch(debug=True, share=True)
     print("Gradio application stopped.")