Spaces:

sagar007
/

DeepSeekR1_Search

Runtime error

App Files Files Community

sagar007 commited on Mar 27

Commit

6560c55

verified ·

1 Parent(s): ffc273f

Update app.py

Browse files

Files changed (1) hide show

app.py +232 -209

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# import spaces # Removed as @spaces.GPU is not used with async
 from duckduckgo_search import DDGS
 import time
 import torch
@@ -16,8 +16,9 @@ from concurrent.futures import ThreadPoolExecutor
 import warnings
 import traceback # For detailed error logging
 import re # For text cleaning
-import shutil # For checking sudo
 import html # For escaping HTML
 # --- Configuration ---
 MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
@@ -27,11 +28,11 @@ MAX_TTS_CHARS = 1000 # Max characters for a single TTS chunk
 MAX_NEW_TOKENS = 300
 TEMPERATURE = 0.7
 TOP_P = 0.95
-KOKORO_PATH = 'Kokoro-82M' # Path to TTS model directory
 # --- Initialization ---
-# Use a ThreadPoolExecutor for blocking I/O or CPU-bound tasks
-executor = ThreadPoolExecutor(max_workers=os.cpu_count() or 4) # Use available cores
 # Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
@@ -40,41 +41,42 @@ warnings.filterwarnings("ignore", message="Backend 'inductor' is not available."
 # --- LLM Initialization ---
 llm_model: Optional[AutoModelForCausalLM] = None
 llm_tokenizer: Optional[AutoTokenizer] = None
-llm_device = "cpu" # Default device
 try:
-    print("Initializing LLM...")
     llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     llm_tokenizer.pad_token = llm_tokenizer.eos_token
     if torch.cuda.is_available():
         llm_device = "cuda"
         torch_dtype = torch.float16
-        device_map = "auto" # Let accelerate handle distribution
-        print(f"CUDA detected. Loading model with device_map='{device_map}', dtype={torch_dtype}")
     else:
         llm_device = "cpu"
-        torch_dtype = torch.float32 # float32 for CPU
         device_map = {"": "cpu"}
-        print(f"CUDA not found. Loading model on CPU with dtype={torch_dtype}")
     llm_model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map=device_map,
         low_cpu_mem_usage=True,
         torch_dtype=torch_dtype,
-        # attn_implementation="flash_attention_2" # Optional: Uncomment if flash-attn is installed and compatible GPU
     )
-    print(f"LLM loaded successfully. Device map: {llm_model.hf_device_map if hasattr(llm_model, 'hf_device_map') else 'N/A'}")
-    llm_model.eval() # Set to evaluation mode
 except Exception as e:
-    print(f"FATAL: Error initializing LLM model: {str(e)}")
     print(traceback.format_exc())
-    # Depending on environment, you might exit or just disable LLM features
     llm_model = None
     llm_tokenizer = None
-    print("LLM features will be unavailable.")
 # --- TTS Initialization ---
@@ -85,147 +87,189 @@ VOICE_CHOICES = {
     '🇺🇸 Nicole': 'af_nicole'
 }
 TTS_ENABLED = False
-tts_model: Optional[Any] = None # Define type more specifically if Kokoro provides it
-voicepacks: Dict[str, Any] = {}  # Cache voice packs
-tts_device = "cpu" # Default device for TTS model
-# Use a lock for thread-safe access during initialization if needed, though Thread ensures sequential execution
-# tts_init_lock = threading.Lock()
-def _run_subprocess(cmd: List[str], check: bool = True, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
-    """Helper to run subprocess and capture output."""
     print(f"Running command: {' '.join(cmd)}")
     try:
-        result = subprocess.run(cmd, check=check, capture_output=True, text=True, cwd=cwd)
-        if result.stdout: print(f"Stdout: {result.stdout.strip()}")
-        if result.stderr: print(f"Stderr: {result.stderr.strip()}")
         return result
     except FileNotFoundError:
-        print(f"Error: Command not found - {cmd[0]}")
         raise
     except subprocess.CalledProcessError as e:
-        print(f"Error running command: {' '.join(e.cmd)}")
-        if e.stdout: print(f"Stdout: {e.stdout.strip()}")
-        if e.stderr: print(f"Stderr: {e.stderr.strip()}")
         raise
 def setup_tts_task():
     """Initializes Kokoro TTS model and dependencies."""
     global TTS_ENABLED, tts_model, voicepacks, tts_device
     print("[TTS Setup] Starting background initialization...")
-    # Determine TTS device
     tts_device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"[TTS Setup] Target device: {tts_device}")
     can_sudo = shutil.which('sudo') is not None
     apt_cmd_prefix = ['sudo'] if can_sudo else []
     try:
         # 1. Clone Kokoro Repo if needed
-        if not os.path.exists(KOKORO_PATH):
-            print(f"[TTS Setup] Cloning repository to {KOKORO_PATH}...")
             try:
                 _run_subprocess(['git', 'lfs', 'install', '--system', '--skip-repo'])
             except Exception as lfs_err:
-                print(f"[TTS Setup] Warning: git lfs install command failed: {lfs_err}. Continuing clone...")
-            _run_subprocess(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', KOKORO_PATH])
             try:
                  print("[TTS Setup] Running git lfs pull...")
-                 _run_subprocess(['git', 'lfs', 'pull'], cwd=KOKORO_PATH)
             except Exception as lfs_pull_err:
                  print(f"[TTS Setup] Warning: git lfs pull failed: {lfs_pull_err}")
         else:
-            print(f"[TTS Setup] Directory {KOKORO_PATH} already exists.")
         # 2. Install espeak dependency
         print("[TTS Setup] Checking/Installing espeak...")
         try:
             _run_subprocess(apt_cmd_prefix + ['apt-get', 'update', '-qq'])
             _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak-ng'])
             print("[TTS Setup] espeak-ng installed or already present.")
         except Exception:
-            print("[TTS Setup] espeak-ng failed, trying espeak...")
             try:
                 _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak'])
                 print("[TTS Setup] espeak installed or already present.")
             except Exception as espeak_err:
                 print(f"[TTS Setup] ERROR: Failed to install both espeak-ng and espeak: {espeak_err}. TTS disabled.")
-                return # Critical dependency missing
         # 3. Load Kokoro Model and Voices
-        if os.path.exists(KOKORO_PATH):
-            sys_path_updated = False
-            if KOKORO_PATH not in sys.path:
-                sys.path.append(KOKORO_PATH)
                 sys_path_updated = True
             try:
                 from models import build_model
                 from kokoro import generate as generate_tts_internal
-                globals()['build_model'] = build_model # Make available globally
                 globals()['generate_tts_internal'] = generate_tts_internal
-                model_file = os.path.join(KOKORO_PATH, 'kokoro-v0_19.pth')
                 if not os.path.exists(model_file):
                     print(f"[TTS Setup] ERROR: Model file {model_file} not found. TTS disabled.")
                     return
                 print(f"[TTS Setup] Loading TTS model from {model_file} onto {tts_device}...")
                 tts_model = build_model(model_file, tts_device)
-                tts_model.eval() # Set to eval mode
                 print("[TTS Setup] TTS model loaded.")
                 # Load voices
                 loaded_voices = 0
                 for voice_name, voice_id in VOICE_CHOICES.items():
-                    voice_file_path = os.path.join(KOKORO_PATH, 'voices', f'{voice_id}.pt')
                     if os.path.exists(voice_file_path):
                         try:
                             print(f"[TTS Setup] Loading voice: {voice_id} ({voice_name})")
-                            # map_location ensures it loads to the correct device
                             voicepacks[voice_id] = torch.load(voice_file_path, map_location=tts_device)
                             loaded_voices += 1
                         except Exception as e:
                             print(f"[TTS Setup] Warning: Failed to load voice {voice_id}: {str(e)}")
                     else:
-                        print(f"[TTS Setup] Info: Voice file {voice_file_path} not found, skipping.")
                 if loaded_voices == 0:
                     print("[TTS Setup] ERROR: No voicepacks could be loaded. TTS disabled.")
-                    tts_model = None # Unload model if no voices
                     return
                 TTS_ENABLED = True
                 print(f"[TTS Setup] Initialization successful. {loaded_voices} voices loaded. TTS Enabled: {TTS_ENABLED}")
             except ImportError as ie:
-                print(f"[TTS Setup] ERROR: Failed to import Kokoro modules: {ie}. Check clone and path. TTS disabled.")
             except Exception as load_err:
-                print(f"[TTS Setup] ERROR: Failed loading TTS model/voices: {load_err}. TTS disabled.")
                 print(traceback.format_exc())
             finally:
-                 # Clean up sys.path if modified
-                 if sys_path_updated and KOKORO_PATH in sys.path:
-                     sys.path.remove(KOKORO_PATH)
         else:
-            print(f"[TTS Setup] ERROR: {KOKORO_PATH} directory not found. TTS disabled.")
     except Exception as e:
         print(f"[TTS Setup] ERROR: Unexpected error during setup: {str(e)}")
         print(traceback.format_exc())
-        # Ensure TTS is marked as disabled
-        TTS_ENABLED = False
         tts_model = None
         voicepacks.clear()
-# Start TTS setup in a background thread
 print("Starting TTS setup thread...")
 tts_setup_thread = threading.Thread(target=setup_tts_task, daemon=True)
 tts_setup_thread.start()
-# --- Core Functions ---
 @lru_cache(maxsize=128)
 def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
@@ -244,33 +288,27 @@ def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> L
             return formatted
     except Exception as e:
         print(f"[Web Search] Error: {e}")
-        print(traceback.format_exc())
         return []
 def format_llm_prompt(query: str, context: List[Dict[str, Any]]) -> str:
     """Formats the prompt for the LLM, including context and instructions."""
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     context_str = "\n\n".join(
-        [f"[{res['id']}] {res['title']}\n{res['snippet']}" for res in context]
     ) if context else "No relevant web context found."
-    return f"""You are a helpful AI assistant. Answer the user's query based *only* on the provided web search context.
-Instructions:
-- Synthesize information from the context to answer concisely.
-- Cite sources using bracket notation like [1], [2], etc., corresponding to the context IDs.
-- If the context is insufficient, state that clearly. Do not add external information.
-- Use markdown for formatting.
-Current Time: {current_time}
-Web Context:
 ---
 {context_str}
 ---
-User Query: {query}
-Answer:"""
 def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
     """Formats search results into HTML for display."""
@@ -280,7 +318,7 @@ def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
     for res in web_results:
         title_safe = html.escape(res.get("title", "Source"))
         snippet_safe = html.escape(res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else ""))
-        url = res.get("url", "#")
         items_html += f"""
         <div class='source-item'>
             <div class='source-number'>[{res['id']}]</div>
@@ -295,7 +333,8 @@ def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
 async def generate_llm_answer(prompt: str) -> str:
     """Generates answer using the loaded LLM (Async Wrapper)."""
     if not llm_model or not llm_tokenizer:
-        return "Error: LLM model is not available."
     print(f"[LLM Generate] Requesting generation (prompt length {len(prompt)})...")
     start_time = time.time()
@@ -305,12 +344,11 @@ async def generate_llm_answer(prompt: str) -> str:
             return_tensors="pt",
             padding=True,
             truncation=True,
-            max_length=1024, # Consider model's actual max length
             return_attention_mask=True
-        ).to(llm_model.device) # Ensure inputs are on the same device as model parts
         with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(llm_model.dtype == torch.float16)):
-            # Run blocking model.generate in the executor thread pool
             outputs = await asyncio.get_event_loop().run_in_executor(
                 executor,
                 llm_model.generate,
@@ -325,20 +363,12 @@ async def generate_llm_answer(prompt: str) -> str:
                 num_return_sequences=1
             )
-        # Decode only newly generated tokens relative to input
         output_ids = outputs[0][inputs.input_ids.shape[1]:]
         answer_part = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
-        # Handle potential empty generation
         if not answer_part:
-             # Sometimes the split method above is needed if the model includes the prompt
-             full_output = llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
-             answer_marker = "Answer:"
-             marker_index = full_output.rfind(answer_marker)
-             if marker_index != -1:
-                 answer_part = full_output[marker_index + len(answer_marker):].strip()
-             else:
-                 answer_part = "*Model generated an empty response.*" # Fallback message
         end_time = time.time()
         print(f"[LLM Generate] Generation complete in {end_time - start_time:.2f}s. Length: {len(answer_part)}")
@@ -347,22 +377,21 @@ async def generate_llm_answer(prompt: str) -> str:
     except Exception as e:
         print(f"[LLM Generate] Error: {e}")
         print(traceback.format_exc())
-        return f"Error during answer generation: {str(e)}"
 async def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple[int, np.ndarray]]:
     """Generates speech using the loaded TTS model (Async Wrapper)."""
     if not TTS_ENABLED or not tts_model or 'generate_tts_internal' not in globals():
         print("[TTS Generate] Skipping: TTS not ready.")
         return None
-    if not text or not text.strip():
-        print("[TTS Generate] Skipping: Empty text.")
         return None
     print(f"[TTS Generate] Requesting speech (length {len(text)}, voice '{voice_id}')...")
     start_time = time.time()
     try:
-        # Verify voicepack availability
         actual_voice_id = voice_id
         if voice_id not in voicepacks:
             print(f"[TTS Generate] Warning: Voice '{voice_id}' not loaded. Trying default 'af'.")
@@ -371,18 +400,23 @@ async def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple
                 print("[TTS Generate] Error: Default voice 'af' also not available.")
                 return None
-        # Clean text for TTS
-        clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text) # Remove citations like [1], [2][3]
-        clean_text = re.sub(r'[\*\#\`]', '', clean_text)      # Remove markdown symbols
         clean_text = ' '.join(clean_text.split())             # Normalize whitespace
-        if not clean_text: return None # Skip if empty after cleaning
-        # Truncate if necessary
         if len(clean_text) > MAX_TTS_CHARS:
-            print(f"[TTS Generate] Truncating text from {len(clean_text)} to {MAX_TTS_CHARS} chars.")
             clean_text = clean_text[:MAX_TTS_CHARS]
-            last_punct = max(clean_text.rfind(p) for p in '.?! ')
             if last_punct != -1: clean_text = clean_text[:last_punct+1]
             clean_text += "..."
@@ -390,17 +424,13 @@ async def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple
         gen_func = globals()['generate_tts_internal']
         voice_pack_data = voicepacks[actual_voice_id]
-        # Run blocking TTS generation in the executor thread pool
-        # Assuming 'afr' is the correct language code for Kokoro's default voices
         audio_data, _ = await asyncio.get_event_loop().run_in_executor(
-            executor,
-            gen_func,
-            tts_model,      # The loaded model object
-            clean_text,     # The cleaned text string
-            voice_pack_data,# The loaded voice pack tensor/dict
-            'afr'           # Language code (verify this is correct)
         )
         if isinstance(audio_data, torch.Tensor):
             audio_np = audio_data.detach().cpu().numpy()
         elif isinstance(audio_data, np.ndarray):
@@ -409,8 +439,7 @@ async def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple
             print("[TTS Generate] Warning: Unexpected audio data type.")
             return None
-        # Ensure audio is 1D float32
-        audio_np = audio_np.flatten().astype(np.float32)
         end_time = time.time()
         print(f"[TTS Generate] Audio generated in {end_time - start_time:.2f}s. Shape: {audio_np.shape}")
@@ -427,9 +456,7 @@ def get_voice_id_from_display(voice_display_name: str) -> str:
 # --- Gradio Interaction Logic ---
-# Define type for chat history using the 'messages' format
-ChatHistoryType = List[Dict[str, str]]
 async def handle_interaction(
     query: str,
@@ -438,94 +465,84 @@ async def handle_interaction(
 ):
     """Main async generator function to handle user queries and update Gradio UI."""
     print(f"\n--- Handling Query ---")
     print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
-    if not query or not query.strip():
         print("Empty query received.")
-        # Need to yield the current state for all outputs
-        yield history, "*Please enter a query.*", "<div class='no-sources'>Enter a query to search.</div>", None, gr.Button(value="Search", interactive=True)
         return
-    # Append user message to history
-    current_history = history + [{"role": "user", "content": query}]
     # Add placeholder for assistant response
-    current_history.append({"role": "assistant", "content": "*Searching...*"})
     # 1. Initial State: Searching
-    yield (
-        current_history,
-        "*Searching the web...*", # Update answer area
-        "<div class='searching'><span>Searching the web...</span></div>", # Update sources area
-        None, # No audio yet
-        gr.Button(value="Searching...", interactive=False) # Update button state
-    )
     # 2. Perform Web Search (in executor)
     web_results = await asyncio.get_event_loop().run_in_executor(
         executor, get_web_results_sync, query
     )
-    sources_html = format_sources_html(web_results)
     # Update state: Generating Answer
-    current_history[-1]["content"] = "*Generating answer...*" # Update assistant placeholder
-    yield (
-        current_history,
-        "*Generating answer...*", # Update answer area
-        sources_html,             # Show sources
-        None,
-        gr.Button(value="Generating...", interactive=False)
-    )
     # 3. Generate LLM Answer (async)
     llm_prompt = format_llm_prompt(query, web_results)
     final_answer = await generate_llm_answer(llm_prompt)
-    # Update assistant message in history with the final answer
     current_history[-1]["content"] = final_answer
     # Update state: Generating Audio (if applicable)
-    yield (
-        current_history,
-        final_answer, # Show final answer
-        sources_html,
-        None,
-        gr.Button(value="Audio...", interactive=False) if TTS_ENABLED else gr.Button(value="Search", interactive=True) # Enable search if TTS disabled
-    )
     # 4. Generate TTS Speech (async)
-    audio_output_data = None
     tts_status_message = ""
     if not TTS_ENABLED:
         if tts_setup_thread.is_alive():
              tts_status_message = "\n\n*(TTS initializing...)*"
         else:
-             tts_status_message = "\n\n*(TTS disabled or failed)*"
-    elif final_answer and not final_answer.startswith("Error"):
         voice_id = get_voice_id_from_display(selected_voice_display_name)
-        audio_output_data = await generate_tts_speech(final_answer, voice_id)
-        if audio_output_data is None:
             tts_status_message = "\n\n*(Audio generation failed)*"
     # 5. Final State: Show all results
     final_answer_with_status = final_answer + tts_status_message
-    current_history[-1]["content"] = final_answer_with_status # Update history with status msg too
     print("--- Query Handling Complete ---")
-    yield (
-        current_history,
-        final_answer_with_status, # Show answer + TTS status
-        sources_html,
-        audio_output_data,        # Output audio data (or None)
-        gr.Button(value="Search", interactive=True) # Re-enable button
-    )
 # --- Gradio UI Definition ---
-# (CSS remains largely the same - ensure it targets default Gradio classes if elem_classes was removed)
 css = """
-/* ... [Your existing refined CSS, but remove selectors using .gradio-examples if you were using it] ... */
-/* Example: Style examples container via its parent or default class if needed */
-/* .examples-container .gradio-examples { ... } */ /* This might still work depending on structure */
 .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
 #header { text-align: center; margin-bottom: 2rem; padding: 2rem 0; background: linear-gradient(135deg, #1a1b1e, #2d2e32); border-radius: 12px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.2); }
 #header h1 { color: white; font-size: 2.5rem; margin-bottom: 0.5rem; text-shadow: 0 2px 4px rgba(0,0,0,0.3); }
@@ -542,8 +559,8 @@ css = """
 .search-box button:hover { background: #1d4ed8 !important; }
 .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
 .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
-.answer-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; color: #1f2937; margin-bottom: 1.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.05); }
-.answer-box p { color: #374151; line-height: 1.7; }
 .answer-box code { background: #f3f4f6; border-radius: 4px; padding: 2px 4px; color: #4b5563; font-size: 0.9em; }
 .sources-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; }
 .sources-box h3 { margin-top: 0; margin-bottom: 1rem; color: #111827; font-size: 1.2rem; }
@@ -555,13 +572,12 @@ css = """
 .source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; transition: all 0.2s; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
 .source-title:hover { color: #1d4ed8; text-decoration: underline; }
 .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
-.chat-history { /* Style the chatbot container */ max-height: 400px; overflow-y: auto; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; margin-top: 1rem; scrollbar-width: thin; scrollbar-color: #d1d5db #f9fafb; }
 .chat-history > div { padding: 1rem; } /* Add padding inside the chatbot display area */
 .chat-history::-webkit-scrollbar { width: 6px; }
 .chat-history::-webkit-scrollbar-track { background: #f9fafb; }
 .chat-history::-webkit-scrollbar-thumb { background-color: #d1d5db; border-radius: 20px; }
 .examples-container { background: #f9fafb; border-radius: 8px; padding: 1rem; margin-top: 1rem; border: 1px solid #e5e7eb; }
-/* Default styling for example buttons (since elem_classes might not work) */
 .examples-container button { background: white !important; border: 1px solid #d1d5db !important; color: #374151 !important; transition: all 0.2s; margin: 4px !important; font-size: 0.9em !important; padding: 6px 12px !important; border-radius: 4px !important; }
 .examples-container button:hover { background: #f3f4f6 !important; border-color: #adb5bd !important; }
 .markdown-content { color: #374151 !important; font-size: 1rem; line-height: 1.7; }
@@ -578,8 +594,8 @@ css = """
 .markdown-content table { border-collapse: collapse !important; width: 100% !important; margin: 1em 0; }
 .markdown-content th, .markdown-content td { padding: 8px 12px !important; border: 1px solid #d1d5db !important; text-align: left;}
 .markdown-content th { background: #f9fafb !important; font-weight: 600; }
-.accordion { background: #f9fafb !important; border: 1px solid #e5e7eb !important; border-radius: 8px !important; margin-top: 1rem !important; box-shadow: none !important; }
-.accordion > .label-wrap { padding: 10px 15px !important; }
 .voice-selector { margin: 0; padding: 0; height: 100%; }
 .voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
 .voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; transition: all 0.2s; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
@@ -592,7 +608,7 @@ css = """
 .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
 @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
 .searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
-/* Dark Mode Styles (Optional - keep if needed) */
 .dark .gradio-container { background-color: #111827 !important; }
 .dark #header { background: linear-gradient(135deg, #1f2937, #374151); }
 .dark #header h3 { color: #9ca3af; }
@@ -630,8 +646,8 @@ css = """
 .dark .markdown-content blockquote { border-left-color: #4b5563 !important; color: #9ca3af !important; }
 .dark .markdown-content th, .dark .markdown-content td { border-color: #4b5563 !important; }
 .dark .markdown-content th { background: #374151 !important; }
-.dark .accordion { background: #374151 !important; border-color: #4b5563 !important; }
-.dark .accordion > .label-wrap { color: #d1d5db !important; }
 .dark .voice-selector select { background: #1f2937 !important; color: #d1d5db !important; border-color: #4b5563 !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important;}
 .dark .voice-selector select:focus { border-color: #3b82f6 !important; }
 .dark .audio-player { background: #374151 !important; border-color: #4b5563;}
@@ -644,13 +660,11 @@ css = """
 .dark .no-sources { background: #374151; color: #9ca3af; border-color: #4b5563;}
 """
-import sys # Needed for sys.path manipulation in TTS setup
 with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
-    # Use gr.State to store the chat history in the 'messages' format
     chat_history_state = gr.State([])
-    with gr.Column(): # Main container
         # Header
         with gr.Column(elem_id="header"):
             gr.Markdown("# 🔍 AI Search Assistant")
@@ -658,27 +672,25 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(pri
         # Search Area
         with gr.Column(elem_classes="search-container"):
-            with gr.Row(elem_classes="search-box", equal_height=False):
                 search_input = gr.Textbox(label="", placeholder="Ask anything...", scale=5, container=False)
                 voice_select = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), value=list(VOICE_CHOICES.keys())[0], label="", scale=1, min_width=180, container=False, elem_classes="voice-selector")
                 search_btn = gr.Button("Search", variant="primary", scale=0, min_width=100)
             # Results Area
-            with gr.Row(elem_classes="results-container", equal_height=False):
-                # Left Column: Answer & History
                 with gr.Column(scale=3):
-                    # Chatbot display (uses 'messages' format now)
                     chatbot_display = gr.Chatbot(
                         label="Conversation",
                         bubble_full_width=True,
-                        height=500,
                         elem_classes="chat-history",
-                        type="messages", # Use the recommended type
-                        avatar_images=(None, os.path.join(KOKORO_PATH, "icon.png") if os.path.exists(os.path.join(KOKORO_PATH, "icon.png")) else None) # Optional: Add avatar for assistant
                     )
-                    # Separate Markdown for status/intermediate answer
                     answer_status_output = gr.Markdown(value="*Enter a query to start.*", elem_classes="answer-box markdown-content")
-                    # Audio Output
                     audio_player = gr.Audio(label="Voice Response", type="numpy", autoplay=False, show_label=False, elem_classes="audio-player")
                 # Right Column: Sources
@@ -689,7 +701,6 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(pri
             # Examples Area
             with gr.Row(elem_classes="examples-container"):
-                 # REMOVED elem_classes from gr.Examples
                  gr.Examples(
                     examples=[
                         "Latest news about renewable energy",
@@ -700,47 +711,54 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(pri
                     ],
                     inputs=search_input,
                     label="Try these examples:",
                 )
     # --- Event Handling Setup ---
-    # Define the inputs and outputs for the Gradio event triggers
     event_inputs = [search_input, chat_history_state, voice_select]
     event_outputs = [
-        chatbot_display,        # Updated chat history
-        answer_status_output,   # Status or final answer text
-        sources_output_html,    # Formatted sources
-        audio_player,           # Audio data
-        search_btn              # Button state (enabled/disabled)
     ]
-    # Create a wrapper to adapt the async generator for Gradio's streaming updates
     async def stream_interaction_updates(query, history, voice_display_name):
          try:
-             # Iterate through the states yielded by the handler
-             async for state_update in handle_interaction(query, history, voice_display_name):
-                 yield state_update # Yield the tuple of output values
          except Exception as e:
             print(f"[Gradio Stream] Error during interaction: {e}")
             print(traceback.format_exc())
-            # Yield a final error state to the UI
-            error_history = history + [{"role":"user", "content":query}, {"role":"assistant", "content":f"*Error: {e}*"}]
-            yield (
                 error_history,
                 f"An error occurred: {e}",
                 "<div class='error'>Request failed.</div>",
                 None,
-                gr.Button(value="Search", interactive=True)
             )
-         finally:
-            # Clear the text input after processing is complete (or errored out)
-            # We need to yield the final state *plus* the cleared input
-            # This requires adding search_input to the outputs list for the event triggers
-            # For now, let's not clear it automatically to avoid complexity.
-            # yield (*final_state_tuple, gr.Textbox(value="")) # Example if clearing input
-             print("[Gradio Stream] Interaction stream finished.")
-    # Connect the streaming function to the button click and input submit events
     search_btn.click(
         fn=stream_interaction_updates,
         inputs=event_inputs,
@@ -752,10 +770,15 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(pri
         outputs=event_outputs
     )
 if __name__ == "__main__":
     print("Starting Gradio application...")
     demo.queue(max_size=20).launch(
         debug=True,
-        share=True,
-        # server_name="0.0.0.0" # Optional: Bind to all interfaces
-    )

+# --- Imports ---
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from duckduckgo_search import DDGS
 import time
 import torch
 import warnings
 import traceback # For detailed error logging
 import re # For text cleaning
+import shutil # For checking sudo/file operations
 import html # For escaping HTML
+import sys # For sys.path manipulation
 # --- Configuration ---
 MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 MAX_NEW_TOKENS = 300
 TEMPERATURE = 0.7
 TOP_P = 0.95
+KOKORO_PATH = 'Kokoro-82M' # Relative path to TTS model directory
 # --- Initialization ---
+# Thread Pool Executor for blocking tasks
+executor = ThreadPoolExecutor(max_workers=os.cpu_count() or 4)
 # Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
 # --- LLM Initialization ---
 llm_model: Optional[AutoModelForCausalLM] = None
 llm_tokenizer: Optional[AutoTokenizer] = None
+llm_device = "cpu"
 try:
+    print("[LLM Init] Initializing Language Model...")
     llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     llm_tokenizer.pad_token = llm_tokenizer.eos_token
     if torch.cuda.is_available():
         llm_device = "cuda"
         torch_dtype = torch.float16
+        device_map = "auto"
+        print(f"[LLM Init] CUDA detected. Loading model with device_map='{device_map}', dtype={torch_dtype}")
     else:
         llm_device = "cpu"
+        torch_dtype = torch.float32
         device_map = {"": "cpu"}
+        print(f"[LLM Init] CUDA not found. Loading model on CPU with dtype={torch_dtype}")
     llm_model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map=device_map,
         low_cpu_mem_usage=True,
         torch_dtype=torch_dtype,
+        # attn_implementation="flash_attention_2" # Optional
     )
+    # Get the actual device map if using 'auto'
+    effective_device_map = llm_model.hf_device_map if hasattr(llm_model, 'hf_device_map') else device_map
+    print(f"[LLM Init] LLM loaded successfully. Device map: {effective_device_map}")
+    llm_model.eval()
 except Exception as e:
+    print(f"[LLM Init] FATAL: Error initializing LLM model: {str(e)}")
     print(traceback.format_exc())
     llm_model = None
     llm_tokenizer = None
+    print("[LLM Init] LLM features will be unavailable.")
 # --- TTS Initialization ---
     '🇺🇸 Nicole': 'af_nicole'
 }
 TTS_ENABLED = False
+tts_model: Optional[Any] = None
+voicepacks: Dict[str, Any] = {}
+tts_device = "cpu"
+# Helper for running subprocesses
+def _run_subprocess(cmd: List[str], check: bool = True, cwd: Optional[str] = None, timeout: int = 300) -> subprocess.CompletedProcess:
+    """Runs a subprocess command, captures output, and handles errors."""
     print(f"Running command: {' '.join(cmd)}")
     try:
+        result = subprocess.run(cmd, check=check, capture_output=True, text=True, cwd=cwd, timeout=timeout)
+        # Only print output details if check failed or for specific successful commands
+        if not check or result.returncode != 0:
+            if result.stdout: print(f"  Stdout: {result.stdout.strip()}")
+            if result.stderr: print(f"  Stderr: {result.stderr.strip()}")
+        elif result.returncode == 0 and ('clone' in cmd or 'pull' in cmd or 'install' in cmd):
+            print(f"  Command successful.") # Concise success message
         return result
     except FileNotFoundError:
+        print(f"  Error: Command not found - {cmd[0]}")
+        raise
+    except subprocess.TimeoutExpired:
+        print(f"  Error: Command timed out - {' '.join(cmd)}")
         raise
     except subprocess.CalledProcessError as e:
+        print(f"  Error running command: {' '.join(e.cmd)} (Code: {e.returncode})")
+        if e.stdout: print(f"  Stdout: {e.stdout.strip()}")
+        if e.stderr: print(f"  Stderr: {e.stderr.strip()}")
         raise
+# TTS Setup Task (runs in background thread)
 def setup_tts_task():
     """Initializes Kokoro TTS model and dependencies."""
     global TTS_ENABLED, tts_model, voicepacks, tts_device
     print("[TTS Setup] Starting background initialization...")
     tts_device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"[TTS Setup] Target device: {tts_device}")
     can_sudo = shutil.which('sudo') is not None
     apt_cmd_prefix = ['sudo'] if can_sudo else []
+    absolute_kokoro_path = os.path.abspath(KOKORO_PATH) # Use absolute path
     try:
         # 1. Clone Kokoro Repo if needed
+        if not os.path.exists(absolute_kokoro_path):
+            print(f"[TTS Setup] Cloning repository to {absolute_kokoro_path}...")
             try:
                 _run_subprocess(['git', 'lfs', 'install', '--system', '--skip-repo'])
             except Exception as lfs_err:
+                print(f"[TTS Setup] Warning: git lfs install failed: {lfs_err}. Continuing...")
+            _run_subprocess(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', absolute_kokoro_path])
             try:
                  print("[TTS Setup] Running git lfs pull...")
+                 _run_subprocess(['git', 'lfs', 'pull'], cwd=absolute_kokoro_path)
             except Exception as lfs_pull_err:
                  print(f"[TTS Setup] Warning: git lfs pull failed: {lfs_pull_err}")
         else:
+            print(f"[TTS Setup] Directory {absolute_kokoro_path} already exists.")
+            # Optional: Run git pull and lfs pull to update if needed
+            # try:
+            #     print("[TTS Setup] Updating existing repo...")
+            #     _run_subprocess(['git', 'pull'], cwd=absolute_kokoro_path)
+            #     _run_subprocess(['git', 'lfs', 'pull'], cwd=absolute_kokoro_path)
+            # except Exception as update_err:
+            #     print(f"[TTS Setup] Warning: Failed to update repo: {update_err}")
         # 2. Install espeak dependency
         print("[TTS Setup] Checking/Installing espeak...")
         try:
+            # Run update quietly first
             _run_subprocess(apt_cmd_prefix + ['apt-get', 'update', '-qq'])
+            # Try installing espeak-ng
             _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak-ng'])
             print("[TTS Setup] espeak-ng installed or already present.")
         except Exception:
+            print("[TTS Setup] espeak-ng installation failed, trying espeak...")
             try:
+                # Fallback to legacy espeak
                 _run_subprocess(apt_cmd_prefix + ['apt-get', 'install', '-y', '-qq', 'espeak'])
                 print("[TTS Setup] espeak installed or already present.")
             except Exception as espeak_err:
                 print(f"[TTS Setup] ERROR: Failed to install both espeak-ng and espeak: {espeak_err}. TTS disabled.")
+                return # Cannot proceed
         # 3. Load Kokoro Model and Voices
+        sys_path_updated = False
+        if os.path.exists(absolute_kokoro_path):
+            print(f"[TTS Setup] Checking contents of: {absolute_kokoro_path}")
+            try:
+                dir_contents = os.listdir(absolute_kokoro_path)
+                print(f"[TTS Setup] Contents: {dir_contents}")
+                if 'models.py' not in dir_contents or 'kokoro.py' not in dir_contents:
+                     print("[TTS Setup] Warning: Core Kokoro python files ('models.py', 'kokoro.py') might be missing!")
+            except OSError as list_err:
+                print(f"[TTS Setup] Warning: Could not list directory contents: {list_err}")
+            # Add path temporarily for import
+            if absolute_kokoro_path not in sys.path:
+                sys.path.insert(0, absolute_kokoro_path) # Add to beginning
                 sys_path_updated = True
+                print(f"[TTS Setup] Temporarily added {absolute_kokoro_path} to sys.path.")
             try:
+                print("[TTS Setup] Attempting to import Kokoro modules...")
                 from models import build_model
                 from kokoro import generate as generate_tts_internal
+                print("[TTS Setup] Kokoro modules imported successfully.")
+                # Make functions globally accessible IF NEEDED (alternative: pass them around)
+                globals()['build_model'] = build_model
                 globals()['generate_tts_internal'] = generate_tts_internal
+                model_file = os.path.join(absolute_kokoro_path, 'kokoro-v0_19.pth')
                 if not os.path.exists(model_file):
                     print(f"[TTS Setup] ERROR: Model file {model_file} not found. TTS disabled.")
                     return
                 print(f"[TTS Setup] Loading TTS model from {model_file} onto {tts_device}...")
                 tts_model = build_model(model_file, tts_device)
+                tts_model.eval()
                 print("[TTS Setup] TTS model loaded.")
                 # Load voices
                 loaded_voices = 0
                 for voice_name, voice_id in VOICE_CHOICES.items():
+                    voice_file_path = os.path.join(absolute_kokoro_path, 'voices', f'{voice_id}.pt')
                     if os.path.exists(voice_file_path):
                         try:
                             print(f"[TTS Setup] Loading voice: {voice_id} ({voice_name})")
                             voicepacks[voice_id] = torch.load(voice_file_path, map_location=tts_device)
                             loaded_voices += 1
                         except Exception as e:
                             print(f"[TTS Setup] Warning: Failed to load voice {voice_id}: {str(e)}")
                     else:
+                        print(f"[TTS Setup] Info: Voice file {voice_file_path} not found.")
                 if loaded_voices == 0:
                     print("[TTS Setup] ERROR: No voicepacks could be loaded. TTS disabled.")
+                    tts_model = None # Free memory if no voices
                     return
                 TTS_ENABLED = True
                 print(f"[TTS Setup] Initialization successful. {loaded_voices} voices loaded. TTS Enabled: {TTS_ENABLED}")
+            # Catch the specific import error
             except ImportError as ie:
+                print(f"[TTS Setup] ERROR: Failed to import Kokoro modules: {ie}.")
+                print(f"  Please ensure '{absolute_kokoro_path}' contains 'models.py' and 'kokoro.py'.")
+                print(traceback.format_exc())
             except Exception as load_err:
+                print(f"[TTS Setup] ERROR: Exception during TTS model/voice loading: {load_err}. TTS disabled.")
                 print(traceback.format_exc())
             finally:
+                 # *** Crucial: Clean up sys.path ***
+                 if sys_path_updated:
+                     try:
+                         if sys.path[0] == absolute_kokoro_path:
+                             sys.path.pop(0)
+                             print(f"[TTS Setup] Removed {absolute_kokoro_path} from sys.path.")
+                         else:
+                              # It might have been removed elsewhere, or wasn't at index 0
+                              if absolute_kokoro_path in sys.path:
+                                   sys.path.remove(absolute_kokoro_path)
+                                   print(f"[TTS Setup] Removed {absolute_kokoro_path} from sys.path (was not index 0).")
+                     except Exception as cleanup_err:
+                          print(f"[TTS Setup] Warning: Error removing path from sys.path: {cleanup_err}")
         else:
+            print(f"[TTS Setup] ERROR: Directory {absolute_kokoro_path} not found. TTS disabled.")
     except Exception as e:
         print(f"[TTS Setup] ERROR: Unexpected error during setup: {str(e)}")
         print(traceback.format_exc())
+        TTS_ENABLED = False # Ensure disabled on any top-level error
         tts_model = None
         voicepacks.clear()
+# Start TTS setup in background
 print("Starting TTS setup thread...")
 tts_setup_thread = threading.Thread(target=setup_tts_task, daemon=True)
 tts_setup_thread.start()
+# --- Core Logic Functions ---
 @lru_cache(maxsize=128)
 def get_web_results_sync(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
             return formatted
     except Exception as e:
         print(f"[Web Search] Error: {e}")
+        # Avoid printing full traceback repeatedly for common network errors maybe
         return []
 def format_llm_prompt(query: str, context: List[Dict[str, Any]]) -> str:
     """Formats the prompt for the LLM, including context and instructions."""
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     context_str = "\n\n".join(
+        [f"[{res['id']}] {html.escape(res['title'])}\n{html.escape(res['snippet'])}" for res in context]
     ) if context else "No relevant web context found."
+    # Using a clear, structured prompt
+    return f"""SYSTEM: You are a helpful AI assistant. Answer the user's query based *only* on the provided web search context. Cite sources using bracket notation like [1], [2]. If the context is insufficient, state that clearly. Use markdown for formatting. Do not add external information. Current Time: {current_time}
+CONTEXT:
 ---
 {context_str}
 ---
+USER: {html.escape(query)}
+ASSISTANT:""" # Using ASSISTANT: marker might help some models
 def format_sources_html(web_results: List[Dict[str, Any]]) -> str:
     """Formats search results into HTML for display."""
     for res in web_results:
         title_safe = html.escape(res.get("title", "Source"))
         snippet_safe = html.escape(res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else ""))
+        url = html.escape(res.get("url", "#")) # Escape URL too
         items_html += f"""
         <div class='source-item'>
             <div class='source-number'>[{res['id']}]</div>
 async def generate_llm_answer(prompt: str) -> str:
     """Generates answer using the loaded LLM (Async Wrapper)."""
     if not llm_model or not llm_tokenizer:
+        print("[LLM Generate] LLM model or tokenizer not available.")
+        return "Error: Language Model is not available."
     print(f"[LLM Generate] Requesting generation (prompt length {len(prompt)})...")
     start_time = time.time()
             return_tensors="pt",
             padding=True,
             truncation=True,
+            max_length=1024, # Adjust based on model limits
             return_attention_mask=True
+        ).to(llm_model.device)
         with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(llm_model.dtype == torch.float16)):
             outputs = await asyncio.get_event_loop().run_in_executor(
                 executor,
                 llm_model.generate,
                 num_return_sequences=1
             )
+        # Decode only newly generated tokens
         output_ids = outputs[0][inputs.input_ids.shape[1]:]
         answer_part = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
         if not answer_part:
+             answer_part = "*Model generated an empty response.*"
         end_time = time.time()
         print(f"[LLM Generate] Generation complete in {end_time - start_time:.2f}s. Length: {len(answer_part)}")
     except Exception as e:
         print(f"[LLM Generate] Error: {e}")
         print(traceback.format_exc())
+        return f"Error during answer generation: Check logs for details." # User-friendly error
 async def generate_tts_speech(text: str, voice_id: str = 'af') -> Optional[Tuple[int, np.ndarray]]:
     """Generates speech using the loaded TTS model (Async Wrapper)."""
     if not TTS_ENABLED or not tts_model or 'generate_tts_internal' not in globals():
         print("[TTS Generate] Skipping: TTS not ready.")
         return None
+    if not text or not text.strip() or text.startswith("Error:") or text.startswith("*Model generated"):
+        print("[TTS Generate] Skipping: Invalid or empty text.")
         return None
     print(f"[TTS Generate] Requesting speech (length {len(text)}, voice '{voice_id}')...")
     start_time = time.time()
     try:
         actual_voice_id = voice_id
         if voice_id not in voicepacks:
             print(f"[TTS Generate] Warning: Voice '{voice_id}' not loaded. Trying default 'af'.")
                 print("[TTS Generate] Error: Default voice 'af' also not available.")
                 return None
+        # Clean text more thoroughly for TTS
+        clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text)    # Remove citations [1], [2][3]
+        clean_text = re.sub(r'```.*?```', '', clean_text, flags=re.DOTALL) # Remove code blocks
+        clean_text = re.sub(r'`[^`]*`', '', clean_text)       # Remove inline code
+        clean_text = re.sub(r'^\s*[\*->]\s*', '', clean_text, flags=re.MULTILINE) # Remove list markers/blockquotes at line start
+        clean_text = re.sub(r'[\*#_]', '', clean_text)        # Remove remaining markdown emphasis/headers
+        clean_text = html.unescape(clean_text)                # Decode HTML entities
         clean_text = ' '.join(clean_text.split())             # Normalize whitespace
+        if not clean_text:
+            print("[TTS Generate] Skipping: Text empty after cleaning.")
+            return None
         if len(clean_text) > MAX_TTS_CHARS:
+            print(f"[TTS Generate] Truncating cleaned text from {len(clean_text)} to {MAX_TTS_CHARS} chars.")
             clean_text = clean_text[:MAX_TTS_CHARS]
+            last_punct = max(clean_text.rfind(p) for p in '.?!; ') # Find reasonable cut-off
             if last_punct != -1: clean_text = clean_text[:last_punct+1]
             clean_text += "..."
         gen_func = globals()['generate_tts_internal']
         voice_pack_data = voicepacks[actual_voice_id]
+        # Execute in thread pool
+        # Verify the expected language code ('afr', 'eng', etc.) for Kokoro
         audio_data, _ = await asyncio.get_event_loop().run_in_executor(
+            executor, gen_func, tts_model, clean_text, voice_pack_data, 'afr'
         )
+        # Process output
         if isinstance(audio_data, torch.Tensor):
             audio_np = audio_data.detach().cpu().numpy()
         elif isinstance(audio_data, np.ndarray):
             print("[TTS Generate] Warning: Unexpected audio data type.")
             return None
+        audio_np = audio_np.flatten().astype(np.float32) # Ensure 1D float32
         end_time = time.time()
         print(f"[TTS Generate] Audio generated in {end_time - start_time:.2f}s. Shape: {audio_np.shape}")
 # --- Gradio Interaction Logic ---
+ChatHistoryType = List[Dict[str, Optional[str]]] # Allow None for content during streaming
 async def handle_interaction(
     query: str,
 ):
     """Main async generator function to handle user queries and update Gradio UI."""
     print(f"\n--- Handling Query ---")
+    query = query.strip() # Clean input query
     print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
+    if not query:
         print("Empty query received.")
+        yield history, "*Please enter a non-empty query.*", "<div class='no-sources'>Enter a query to search.</div>", None, gr.Button(value="Search", interactive=True)
         return
+    # Use 'messages' format: List of {'role': 'user'/'assistant', 'content': '...'}
+    current_history: ChatHistoryType = history + [{"role": "user", "content": query}]
     # Add placeholder for assistant response
+    current_history.append({"role": "assistant", "content": None}) # Content starts as None
+    # Define states to yield
+    chatbot_state = current_history
+    status_state = "*Searching...*"
+    sources_state = "<div class='searching'><span>Searching the web...</span></div>"
+    audio_state = None
+    button_state = gr.Button(value="Searching...", interactive=False)
     # 1. Initial State: Searching
+    current_history[-1]["content"] = status_state # Update placeholder
+    yield chatbot_state, status_state, sources_state, audio_state, button_state
     # 2. Perform Web Search (in executor)
     web_results = await asyncio.get_event_loop().run_in_executor(
         executor, get_web_results_sync, query
     )
+    sources_state = format_sources_html(web_results)
     # Update state: Generating Answer
+    status_state = "*Generating answer...*"
+    button_state = gr.Button(value="Generating...", interactive=False)
+    current_history[-1]["content"] = status_state # Update placeholder
+    yield chatbot_state, status_state, sources_state, audio_state, button_state
     # 3. Generate LLM Answer (async)
     llm_prompt = format_llm_prompt(query, web_results)
     final_answer = await generate_llm_answer(llm_prompt)
+    status_state = final_answer # Now status holds the actual answer
+    # Update assistant message in history fully
     current_history[-1]["content"] = final_answer
     # Update state: Generating Audio (if applicable)
+    button_state = gr.Button(value="Audio...", interactive=False) if TTS_ENABLED else gr.Button(value="Search", interactive=True)
+    yield chatbot_state, status_state, sources_state, audio_state, button_state
     # 4. Generate TTS Speech (async)
     tts_status_message = ""
     if not TTS_ENABLED:
         if tts_setup_thread.is_alive():
              tts_status_message = "\n\n*(TTS initializing...)*"
         else:
+             # Check if setup failed vs just disabled
+             # This info isn't easily available here, assume failed/disabled
+             tts_status_message = "\n\n*(TTS unavailable)*"
+    else:
         voice_id = get_voice_id_from_display(selected_voice_display_name)
+        audio_state = await generate_tts_speech(final_answer, voice_id) # Returns (rate, data) or None
+        if audio_state is None and not final_answer.startswith("Error"): # Don't show TTS fail if LLM failed
             tts_status_message = "\n\n*(Audio generation failed)*"
     # 5. Final State: Show all results
     final_answer_with_status = final_answer + tts_status_message
+    status_state = final_answer_with_status # Update status display
+    current_history[-1]["content"] = final_answer_with_status # Update history *again* with status msg
+    button_state = gr.Button(value="Search", interactive=True) # Re-enable button
     print("--- Query Handling Complete ---")
+    yield chatbot_state, status_state, sources_state, audio_state, button_state
 # --- Gradio UI Definition ---
+# (CSS from previous response)
 css = """
+/* ... [Your existing refined CSS] ... */
 .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
 #header { text-align: center; margin-bottom: 2rem; padding: 2rem 0; background: linear-gradient(135deg, #1a1b1e, #2d2e32); border-radius: 12px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.2); }
 #header h1 { color: white; font-size: 2.5rem; margin-bottom: 0.5rem; text-shadow: 0 2px 4px rgba(0,0,0,0.3); }
 .search-box button:hover { background: #1d4ed8 !important; }
 .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
 .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
+.answer-box { /* Now used for status/interim text */ background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1rem; color: #1f2937; margin-bottom: 0.5rem; box-shadow: 0 2px 8px rgba(0,0,0,0.05); min-height: 50px;}
+.answer-box p { color: #374151; line-height: 1.7; margin:0;}
 .answer-box code { background: #f3f4f6; border-radius: 4px; padding: 2px 4px; color: #4b5563; font-size: 0.9em; }
 .sources-box { background: white; border: 1px solid #e0e0e0; border-radius: 10px; padding: 1.5rem; }
 .sources-box h3 { margin-top: 0; margin-bottom: 1rem; color: #111827; font-size: 1.2rem; }
 .source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; transition: all 0.2s; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
 .source-title:hover { color: #1d4ed8; text-decoration: underline; }
 .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
+.chat-history { /* Style the chatbot container */ max-height: 500px; overflow-y: auto; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; /* margin-top: 1rem; */ scrollbar-width: thin; scrollbar-color: #d1d5db #f9fafb; }
 .chat-history > div { padding: 1rem; } /* Add padding inside the chatbot display area */
 .chat-history::-webkit-scrollbar { width: 6px; }
 .chat-history::-webkit-scrollbar-track { background: #f9fafb; }
 .chat-history::-webkit-scrollbar-thumb { background-color: #d1d5db; border-radius: 20px; }
 .examples-container { background: #f9fafb; border-radius: 8px; padding: 1rem; margin-top: 1rem; border: 1px solid #e5e7eb; }
 .examples-container button { background: white !important; border: 1px solid #d1d5db !important; color: #374151 !important; transition: all 0.2s; margin: 4px !important; font-size: 0.9em !important; padding: 6px 12px !important; border-radius: 4px !important; }
 .examples-container button:hover { background: #f3f4f6 !important; border-color: #adb5bd !important; }
 .markdown-content { color: #374151 !important; font-size: 1rem; line-height: 1.7; }
 .markdown-content table { border-collapse: collapse !important; width: 100% !important; margin: 1em 0; }
 .markdown-content th, .markdown-content td { padding: 8px 12px !important; border: 1px solid #d1d5db !important; text-align: left;}
 .markdown-content th { background: #f9fafb !important; font-weight: 600; }
+/* .accordion { background: #f9fafb !important; border: 1px solid #e5e7eb !important; border-radius: 8px !important; margin-top: 1rem !important; box-shadow: none !important; } */
+/* .accordion > .label-wrap { padding: 10px 15px !important; } */
 .voice-selector { margin: 0; padding: 0; height: 100%; }
 .voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
 .voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; transition: all 0.2s; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
 .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
 @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
 .searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
+/* Dark Mode Styles */
 .dark .gradio-container { background-color: #111827 !important; }
 .dark #header { background: linear-gradient(135deg, #1f2937, #374151); }
 .dark #header h3 { color: #9ca3af; }
 .dark .markdown-content blockquote { border-left-color: #4b5563 !important; color: #9ca3af !important; }
 .dark .markdown-content th, .dark .markdown-content td { border-color: #4b5563 !important; }
 .dark .markdown-content th { background: #374151 !important; }
+/* .dark .accordion { background: #374151 !important; border-color: #4b5563 !important; } */
+/* .dark .accordion > .label-wrap { color: #d1d5db !important; } */
 .dark .voice-selector select { background: #1f2937 !important; color: #d1d5db !important; border-color: #4b5563 !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important;}
 .dark .voice-selector select:focus { border-color: #3b82f6 !important; }
 .dark .audio-player { background: #374151 !important; border-color: #4b5563;}
 .dark .no-sources { background: #374151; color: #9ca3af; border-color: #4b5563;}
 """
 with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
+    # Use gr.State for chat history in 'messages' format
     chat_history_state = gr.State([])
+    with gr.Column():
         # Header
         with gr.Column(elem_id="header"):
             gr.Markdown("# 🔍 AI Search Assistant")
         # Search Area
         with gr.Column(elem_classes="search-container"):
+            with gr.Row(elem_classes="search-box"):
                 search_input = gr.Textbox(label="", placeholder="Ask anything...", scale=5, container=False)
                 voice_select = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), value=list(VOICE_CHOICES.keys())[0], label="", scale=1, min_width=180, container=False, elem_classes="voice-selector")
                 search_btn = gr.Button("Search", variant="primary", scale=0, min_width=100)
             # Results Area
+            with gr.Row(elem_classes="results-container"):
+                # Left Column: Chatbot, Status, Audio
                 with gr.Column(scale=3):
                     chatbot_display = gr.Chatbot(
                         label="Conversation",
                         bubble_full_width=True,
+                        height=500, # Adjusted height
                         elem_classes="chat-history",
+                        type="messages", # IMPORTANT: Use 'messages' format
+                        show_label=False,
+                         avatar_images=(None, os.path.join(KOKORO_PATH, "icon.png") if os.path.exists(os.path.join(KOKORO_PATH, "icon.png")) else "https://huggingface.co/spaces/gradio/chatbot-streaming/resolve/main/avatar.png") # User/Assistant avatars
                     )
                     answer_status_output = gr.Markdown(value="*Enter a query to start.*", elem_classes="answer-box markdown-content")
                     audio_player = gr.Audio(label="Voice Response", type="numpy", autoplay=False, show_label=False, elem_classes="audio-player")
                 # Right Column: Sources
             # Examples Area
             with gr.Row(elem_classes="examples-container"):
                  gr.Examples(
                     examples=[
                         "Latest news about renewable energy",
                     ],
                     inputs=search_input,
                     label="Try these examples:",
+                    # elem_classes removed
                 )
     # --- Event Handling Setup ---
     event_inputs = [search_input, chat_history_state, voice_select]
     event_outputs = [
+        chatbot_display,        # Output 1: Updated chat history
+        answer_status_output,   # Output 2: Status/final text
+        sources_output_html,    # Output 3: Sources HTML
+        audio_player,           # Output 4: Audio data
+        search_btn              # Output 5: Button state
     ]
     async def stream_interaction_updates(query, history, voice_display_name):
+         """Wraps the async generator to handle streaming updates and errors."""
+         print("[Gradio Stream] Starting interaction...")
+         final_state_tuple = None # To store the last successful state
          try:
+             async for state_update_tuple in handle_interaction(query, history, voice_display_name):
+                 yield state_update_tuple # Yield the tuple for Gradio to update outputs
+                 final_state_tuple = state_update_tuple # Keep track of the last state
+             print("[Gradio Stream] Interaction completed successfully.")
          except Exception as e:
             print(f"[Gradio Stream] Error during interaction: {e}")
             print(traceback.format_exc())
+            # Construct error state to yield
+            error_history = history + [{"role":"user", "content":query}, {"role":"assistant", "content":f"*An error occurred. Please check logs.*"}]
+            error_state_tuple = (
                 error_history,
                 f"An error occurred: {e}",
                 "<div class='error'>Request failed.</div>",
                 None,
+                gr.Button(value="Search", interactive=True) # Ensure button is re-enabled
             )
+            yield error_state_tuple # Yield the error state to UI
+            final_state_tuple = error_state_tuple # Store error state as last state
+         # Optionally clear input ONLY if the interaction finished (success or error)
+         # Requires adding search_input to event_outputs and handling the update dict
+         # Example (if search_input is the 6th output):
+         # if final_state_tuple:
+         #    yield (*final_state_tuple, gr.Textbox(value=""))
+         # else: # Handle case where no state was ever yielded (e.g., immediate empty query return)
+         #    yield (history, "*Please enter a query.*", "...", None, gr.Button(value="Search", interactive=True), gr.Textbox(value=""))
+    # Connect the streaming function
     search_btn.click(
         fn=stream_interaction_updates,
         inputs=event_inputs,
         outputs=event_outputs
     )
+# --- Main Execution ---
 if __name__ == "__main__":
     print("Starting Gradio application...")
+    # Optional: Wait a moment for TTS setup thread to start and potentially print messages
+    # time.sleep(1)
     demo.queue(max_size=20).launch(
         debug=True,
+        share=True, # Set to False if not running on Spaces or don't need public link
+        # server_name="0.0.0.0", # Uncomment to bind to all network interfaces
+        # server_port=7860 # Optional: Specify port
+    )
+    print("Gradio application stopped.")