Spaces:

sagar007
/

DeepSeekR1_Search

Runtime error

App Files Files Community

sagar007 commited on Mar 27

Commit

8652f53

verified ·

1 Parent(s): 3d63694

Update app.py

Browse files

Files changed (1) hide show

app.py +376 -250

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
-import spaces
 from duckduckgo_search import DDGS
 import time
 import torch
@@ -14,21 +14,28 @@ import asyncio
 import threading
 from concurrent.futures import ThreadPoolExecutor
 import warnings
 # Suppress specific warnings if needed (optional)
 warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
 # --- Configuration ---
 MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 MAX_SEARCH_RESULTS = 5
 TTS_SAMPLE_RATE = 24000
-MAX_TTS_CHARS = 1000 # Reduced for faster testing, adjust as needed
-GPU_DURATION = 60 # Increased duration for longer tasks like TTS
-MAX_NEW_TOKENS = 256
 TEMPERATURE = 0.7
 TOP_P = 0.95
 # --- Initialization ---
 # Initialize model and tokenizer with better error handling
 try:
     print("Loading tokenizer...")
@@ -39,21 +46,24 @@ try:
     # Determine device map based on CUDA availability
     device_map = "auto" if torch.cuda.is_available() else {"": "cpu"}
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Use float32 on CPU
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map=device_map,
-        # offload_folder="offload", # Only use offload if really needed and configured
-        low_cpu_mem_usage=True,
-        torch_dtype=torch_dtype
     )
-    print(f"Model loaded on device map: {model.hf_device_map}")
-    print("Model and tokenizer loaded successfully")
 except Exception as e:
-    print(f"Error initializing model: {str(e)}")
-    # If running in Spaces, maybe try loading to CPU as fallback?
-    # For now, just raise the error.
-    raise
 # --- TTS Setup ---
 VOICE_CHOICES = {
@@ -65,47 +75,66 @@ VOICE_CHOICES = {
 TTS_ENABLED = False
 TTS_MODEL = None
 VOICEPACKS = {}  # Cache voice packs
-KOKORO_PATH = 'Kokoro-82M'
 # Initialize Kokoro TTS in a separate thread to avoid blocking startup
 def setup_tts():
     global TTS_ENABLED, TTS_MODEL, VOICEPACKS
     try:
         # Check if Kokoro already exists
         if not os.path.exists(KOKORO_PATH):
             print("Cloning Kokoro-82M repository...")
             # Install git-lfs if not present (might need sudo/apt)
             try:
-                subprocess.run(['git', 'lfs', 'install'], check=True, capture_output=True)
             except (FileNotFoundError, subprocess.CalledProcessError) as lfs_err:
-                print(f"Warning: git-lfs might not be installed or failed: {lfs_err}. Cloning might be slow or incomplete.")
-            clone_cmd = ['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M']
             result = subprocess.run(clone_cmd, check=True, capture_output=True, text=True)
             print("Kokoro cloned successfully.")
-            print(result.stdout)
-            # Optionally pull LFS files if needed (sometimes clone doesn't get them all)
-            # subprocess.run(['git', 'lfs', 'pull'], cwd=KOKORO_PATH, check=True)
         else:
-            print("Kokoro-82M directory already exists.")
         # Install espeak (essential for phonemization)
         print("Attempting to install espeak-ng or espeak...")
         try:
-            # Try installing espeak-ng first (often preferred)
-            subprocess.run(['sudo', 'apt-get', 'update'], check=True, capture_output=True)
-            subprocess.run(['sudo', 'apt-get', 'install', '-y', 'espeak-ng'], check=True, capture_output=True)
             print("espeak-ng installed successfully.")
-        except (FileNotFoundError, subprocess.CalledProcessError):
-            print("espeak-ng installation failed, trying espeak...")
             try:
-                # Fallback to espeak
-                subprocess.run(['sudo', 'apt-get', 'install', '-y', 'espeak'], check=True, capture_output=True)
                 print("espeak installed successfully.")
-            except (FileNotFoundError, subprocess.CalledProcessError) as espeak_err:
-                print(f"Warning: Could not install espeak-ng or espeak: {espeak_err}. TTS functionality will be disabled.")
                 return # Cannot proceed without espeak
         # Set up Kokoro TTS
@@ -117,293 +146,321 @@ def setup_tts():
                 from models import build_model
                 from kokoro import generate as generate_tts_internal # Avoid name clash
-                # Make these functions accessible globally if needed, but better to keep scoped
                 globals()['build_model'] = build_model
                 globals()['generate_tts_internal'] = generate_tts_internal
                 device = 'cuda' if torch.cuda.is_available() else 'cpu'
                 print(f"Loading TTS model onto device: {device}")
-                # Ensure model path is correct
                 model_file = os.path.join(KOKORO_PATH, 'kokoro-v0_19.pth')
                 if not os.path.exists(model_file):
-                     print(f"Error: TTS model file not found at {model_file}")
-                     # Attempt to pull LFS files again
                      try:
-                         print("Attempting git lfs pull...")
-                         subprocess.run(['git', 'lfs', 'pull'], cwd=KOKORO_PATH, check=True, capture_output=True)
                          if not os.path.exists(model_file):
-                            print(f"Error: TTS model file STILL not found at {model_file} after lfs pull.")
                             return
                      except Exception as lfs_pull_err:
-                         print(f"Error during git lfs pull: {lfs_pull_err}")
                          return
                 TTS_MODEL = build_model(model_file, device)
-                # Preload default voice
-                default_voice_id = 'af'
-                voice_file_path = os.path.join(KOKORO_PATH, 'voices', f'{default_voice_id}.pt')
-                if os.path.exists(voice_file_path):
-                    print(f"Loading default voice: {default_voice_id}")
-                    VOICEPACKS[default_voice_id] = torch.load(voice_file_path,
-                                                       map_location=device) # Removed weights_only=True
-                else:
-                    print(f"Warning: Default voice file {voice_file_path} not found.")
-                # Preload other common voices to reduce latency
                 for voice_name, voice_id in VOICE_CHOICES.items():
-                    if voice_id != default_voice_id: # Avoid reloading default
-                        voice_file_path = os.path.join(KOKORO_PATH, 'voices', f'{voice_id}.pt')
-                        if os.path.exists(voice_file_path):
-                            try:
-                                print(f"Preloading voice: {voice_id}")
-                                VOICEPACKS[voice_id] = torch.load(voice_file_path,
-                                                               map_location=device) # Removed weights_only=True
-                            except Exception as e:
-                                print(f"Warning: Could not preload voice {voice_id}: {str(e)}")
-                        else:
-                            print(f"Info: Voice file {voice_file_path} for '{voice_name}' not found, will skip preloading.")
                 TTS_ENABLED = True
-                print("TTS setup completed successfully")
             except ImportError as ie:
-                print(f"Error importing Kokoro modules: {ie}. Check if Kokoro-82M is correctly cloned and in sys.path.")
             except Exception as model_load_err:
-                print(f"Error loading TTS model or voices: {model_load_err}")
         else:
-            print(f"Warning: {KOKORO_PATH} directory not found after clone attempt. TTS disabled.")
     except subprocess.CalledProcessError as spe:
-        print(f"Warning: A subprocess command failed during TTS setup: {spe}")
         print(f"Command: {' '.join(spe.cmd)}")
-        print(f"Stderr: {spe.stderr}")
-        print("TTS may be disabled.")
     except Exception as e:
-        print(f"Warning: An unexpected error occurred during TTS setup: {str(e)}")
         TTS_ENABLED = False
 # Start TTS setup in a separate thread
 print("Starting TTS setup in background thread...")
 tts_thread = threading.Thread(target=setup_tts, daemon=True)
 tts_thread.start()
 # --- Search and Generation Functions ---
 @lru_cache(maxsize=128)
 def get_web_results(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, str]]:
-    """Get web search results using DuckDuckGo with caching for improved performance"""
-    print(f"Performing web search for: '{query}'")
     try:
         with DDGS() as ddgs:
-            # Using safe='off' potentially gives more results but use cautiously
-            results = list(ddgs.text(query, max_results=max_results, safesearch='moderate'))
-            print(f"Found {len(results)} results.")
             formatted_results = []
-            for result in results:
                 formatted_results.append({
-                    "title": result.get("title", "No Title"),
                     "snippet": result.get("body", "No Snippet Available"),
                     "url": result.get("href", "#"),
-                    # Attempt to extract date - DDGS doesn't reliably provide it
-                    # "date": result.get("published", "") # Placeholder
                 })
             return formatted_results
     except Exception as e:
-        print(f"Error in web search: {e}")
         return []
 def format_prompt(query: str, context: List[Dict[str, str]]) -> str:
-    """Format the prompt with web context"""
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    context_lines = '\n'.join([f'- [{res["title"]}]: {res["snippet"]}' for i, res in enumerate(context)]) # No need for index here
     prompt = f"""You are a helpful AI assistant. Your task is to answer the user's query based *only* on the provided web search context.
-Do not add information not present in the context.
-Cite the sources used in your answer using bracket notation, e.g., [Source Title]. Use the titles from the context.
-If the context does not contain relevant information to answer the query, state that clearly.
 Current Time: {current_time}
 Web Context:
-{context_lines if context else "No web context available."}
 User Query: {query}
 Answer:"""
-    # print(f"Formatted Prompt:\n{prompt}") # Debugging
     return prompt
 def format_sources(web_results: List[Dict[str, str]]) -> str:
-    """Format sources with more details"""
     if not web_results:
-        return "<div class='no-sources'>No sources found for the query.</div>"
     sources_html = "<div class='sources-container'>"
-    for i, res in enumerate(web_results, 1):
         title = res.get("title", "Source")
         url = res.get("url", "#")
-        # date = f"<span class='source-date'>{res['date']}</span>" if res.get('date') else "" # DDG date is unreliable
-        snippet = res.get("snippet", "")[:150] + ("..." if len(res.get("snippet", "")) > 150 else "")
         sources_html += f"""
         <div class='source-item'>
-            <div class='source-number'>[{i}]</div>
             <div class='source-content'>
-                <a href="{url}" target="_blank" class='source-title' title="{url}">{title}</a>
-                <div class='source-snippet'>{snippet}</div>
             </div>
         </div>
         """
     sources_html += "</div>"
     return sources_html
-# Use a ThreadPoolExecutor for potentially blocking I/O or CPU-bound tasks
-# Keep GPU tasks separate if possible, or ensure thread safety if sharing GPU resources
-executor = ThreadPoolExecutor(max_workers=4)
-@spaces.GPU(duration=GPU_DURATION, cancellable=True)
 async def generate_answer(prompt: str) -> str:
-    """Generate answer using the DeepSeek model with optimized settings (Async Wrapper)"""
-    print("Generating answer...")
     try:
         inputs = tokenizer(
             prompt,
             return_tensors="pt",
             padding=True,
             truncation=True,
-            max_length=1024, # Increased context length
             return_attention_mask=True
         ).to(model.device)
-        # Ensure generation runs on the correct device
-        with torch.no_grad(), torch.cuda.amp.autocast(enabled=torch.cuda.is_available() and torch_dtype == torch.float16):
-            outputs = await asyncio.to_thread( # Use asyncio.to_thread for potentially blocking calls
                 model.generate,
-                inputs.input_ids,
                 attention_mask=inputs.attention_mask,
                 max_new_tokens=MAX_NEW_TOKENS,
                 temperature=TEMPERATURE,
                 top_p=TOP_P,
                 pad_token_id=tokenizer.eos_token_id,
                 do_sample=True,
-                early_stopping=True,
                 num_return_sequences=1
             )
-        # Decode output
         full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract only the generated part after "Answer:"
-        answer_part = full_output.split("Answer:")[-1].strip()
-        print(f"Generated Answer Raw Length: {len(outputs[0])}, Decoded Answer Part Length: {len(answer_part)}")
-        if not answer_part: # Handle cases where split might fail or answer is empty
-            print("Warning: Could not extract answer after 'Answer:'. Returning full output.")
-            return full_output # Fallback
-        return answer_part
     except Exception as e:
-        print(f"Error during answer generation: {e}")
-        # You might want to return a specific error message here
         return f"Error generating answer: {str(e)}"
-# Ensure this function runs potentially long tasks in a thread using the executor
-# @spaces.GPU(duration=GPU_DURATION, cancellable=True) # Keep GPU decorator if TTS uses GPU heavily
 async def generate_speech(text: str, voice_id: str = 'af') -> Tuple[int, np.ndarray] | None:
     """Generate speech from text using Kokoro TTS model (Async Wrapper)."""
     global TTS_MODEL, TTS_ENABLED, VOICEPACKS
-    print(f"Attempting to generate speech for text (length {len(text)}) with voice '{voice_id}'")
     if not TTS_ENABLED or TTS_MODEL is None:
-        print("TTS is not enabled or model not loaded.")
         return None
     if 'generate_tts_internal' not in globals():
-        print("TTS generation function 'generate_tts_internal' not found.")
         return None
-    try:
-        device = TTS_MODEL.device # Get device from the loaded TTS model
-        # Load voicepack if needed (handle potential errors)
-        if voice_id not in VOICEPACKS:
-            voice_file_path = os.path.join(KOKORO_PATH, 'voices', f'{voice_id}.pt')
-            if os.path.exists(voice_file_path):
-                print(f"Loading voice '{voice_id}' on demand...")
-                try:
-                     VOICEPACKS[voice_id] = await asyncio.to_thread(
-                         torch.load, voice_file_path, map_location=device # Removed weights_only=True
-                     )
-                except Exception as load_err:
-                    print(f"Error loading voicepack {voice_id}: {load_err}. Falling back to default 'af'.")
-                    voice_id = 'af' # Fallback to default
-                    # Ensure default is loaded if fallback occurs
-                    if 'af' not in VOICEPACKS:
-                         default_voice_file = os.path.join(KOKORO_PATH, 'voices', 'af.pt')
-                         if os.path.exists(default_voice_file):
-                              VOICEPACKS['af'] = await asyncio.to_thread(
-                                  torch.load, default_voice_file, map_location=device
-                              )
-                         else:
-                              print("Default voice 'af' also not found. Cannot generate audio.")
-                              return None
-            else:
-                print(f"Voicepack {voice_id}.pt not found. Falling back to default 'af'.")
-                voice_id = 'af' # Fallback to default
-                if 'af' not in VOICEPACKS: # Check again if default is needed now
-                     default_voice_file = os.path.join(KOKORO_PATH, 'voices', 'af.pt')
-                     if os.path.exists(default_voice_file):
-                          VOICEPACKS['af'] = await asyncio.to_thread(
-                              torch.load, default_voice_file, map_location=device
-                          )
-                     else:
-                          print("Default voice 'af' also not found. Cannot generate audio.")
-                          return None
         if voice_id not in VOICEPACKS:
-            print(f"Error: Voice '{voice_id}' could not be loaded.")
-            return None
         # Clean the text (simple cleaning)
-        clean_text = ' '.join(text.split()) # Remove extra whitespace
-        clean_text = clean_text.replace('*', '').replace('[', '').replace(']', '') # Remove markdown chars
-        # Ensure text isn't empty
         if not clean_text.strip():
-            print("Warning: Empty text provided for TTS.")
             return None
-        # Limit text length
         if len(clean_text) > MAX_TTS_CHARS:
-            print(f"Warning: Text too long ({len(clean_text)} chars), truncating to {MAX_TTS_CHARS}.")
-            # Simple truncation, could be smarter (split by sentence)
             clean_text = clean_text[:MAX_TTS_CHARS]
-            last_space = clean_text.rfind(' ')
-            if last_space != -1:
-                clean_text = clean_text[:last_space] + "..." # Truncate at last space
-        # Run the potentially blocking TTS generation in a thread
-        print(f"Generating audio for: '{clean_text[:100]}...'")
         gen_func = globals()['generate_tts_internal']
-        loop = asyncio.get_event_loop()
-        audio_data, _ = await loop.run_in_executor(
-            executor, # Use the thread pool executor
             gen_func,
             TTS_MODEL,
             clean_text,
             VOICEPACKS[voice_id],
-            'a' # Language code (assuming 'a' is appropriate)
         )
         if isinstance(audio_data, torch.Tensor):
             # Move tensor to CPU before converting to numpy if it's not already
-            audio_np = audio_data.cpu().numpy()
         elif isinstance(audio_data, np.ndarray):
             audio_np = audio_data
         else:
-            print("Warning: Unexpected audio data type from TTS.")
             return None
-        print(f"Audio generated successfully, shape: {audio_np.shape}")
         return (TTS_SAMPLE_RATE, audio_np)
     except Exception as e:
-        import traceback
-        print(f"Error generating speech: {str(e)}")
-        print(traceback.format_exc()) # Print full traceback for debugging
         return None
 # Helper to get voice ID from display name
@@ -411,22 +468,29 @@ def get_voice_id(voice_display_name: str) -> str:
     """Maps the user-friendly voice name to the internal voice ID."""
     return VOICE_CHOICES.get(voice_display_name, 'af') # Default to 'af' if not found
-# --- Main Processing Logic (Async) ---
 async def process_query_async(query: str, history: List[List[str]], selected_voice_display_name: str):
     """Asynchronously process user query: search -> generate answer -> generate speech"""
-    if not query:
         yield (
-            "Please enter a query.", "", "Search", history, None
         )
         return
     if history is None: history = []
-    current_history = history + [[query, "*Searching...*"]]
     # 1. Initial state: Searching
     yield (
-        "*Searching & Thinking...*",
-        "<div class='searching'>Searching the web...</div>",
         gr.Button(value="Searching...", interactive=False), # Disable button
         current_history,
         None
@@ -438,26 +502,27 @@ async def process_query_async(query: str, history: List[List[str]], selected_voi
     sources_html = format_sources(web_results)
     # Update state: Analyzing results
-    current_history[-1][1] = "*Analyzing search results...*"
     yield (
-        "*Analyzing search results...*",
         sources_html,
         gr.Button(value="Generating...", interactive=False),
-        current_history,
         None
     )
     # 3. Generate Answer (non-blocking, potentially on GPU)
     prompt = format_prompt(query, web_results)
-    final_answer = await generate_answer(prompt) # Already async
-    # Update state: Answer generated
     current_history[-1][1] = final_answer
     yield (
         final_answer,
         sources_html,
         gr.Button(value="Audio...", interactive=False),
-        current_history,
         None
     )
@@ -465,41 +530,54 @@ async def process_query_async(query: str, history: List[List[str]], selected_voi
     audio = None
     tts_message = ""
     if not tts_thread.is_alive() and not TTS_ENABLED:
-        tts_message = "\n\n*(TTS setup failed or is disabled)*"
     elif tts_thread.is_alive():
-        tts_message = "\n\n*(TTS is still initializing, audio may be delayed)*"
     elif TTS_ENABLED:
         voice_id = get_voice_id(selected_voice_display_name)
-        audio = await generate_speech(final_answer, voice_id) # Already async
-        if audio is None:
-            tts_message = f"\n\n*(Audio generation failed for voice '{voice_id}')*"
     # 5. Final state: Show everything
     yield (
         final_answer + tts_message,
         sources_html,
         gr.Button(value="Search", interactive=True), # Re-enable button
-        current_history,
         audio
     )
 # --- Gradio Interface ---
 css = """
-/* ... [Your existing CSS remains unchanged] ... */
 .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
 #header { text-align: center; margin-bottom: 2rem; padding: 2rem 0; background: linear-gradient(135deg, #1a1b1e, #2d2e32); border-radius: 12px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.2); }
 #header h1 { color: white; font-size: 2.5rem; margin-bottom: 0.5rem; text-shadow: 0 2px 4px rgba(0,0,0,0.3); }
 #header h3 { color: #a8a9ab; }
 .search-container { background: #ffffff; border: 1px solid #e0e0e0; border-radius: 12px; box-shadow: 0 4px 16px rgba(0,0,0,0.05); padding: 1.5rem; margin-bottom: 1.5rem; }
-.search-box { padding: 0; margin-bottom: 1rem; }
-.search-box .gradio-textbox { border-radius: 8px 0 0 8px !important; } /* Style textbox specifically */
-.search-box .gradio-dropdown { border-radius: 0 !important; margin-left: -1px; margin-right: -1px;} /* Style dropdown */
-.search-box .gradio-button { border-radius: 0 8px 8px 0 !important; } /* Style button */
-.search-box input[type="text"] { background: #f7f7f8 !important; border: 1px solid #d1d5db !important; color: #1f2937 !important; transition: all 0.3s ease; height: 42px !important; }
-.search-box input[type="text"]:focus { border-color: #2563eb !important; box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.2) !important; background: white !important; }
 .search-box input[type="text"]::placeholder { color: #9ca3af !important; }
-.search-box button { background: #2563eb !important; border: none !important; color: white !important; box-shadow: 0 1px 2px rgba(0,0,0,0.05) !important; transition: all 0.3s ease !important; height: 44px !important; }
 .search-box button:hover { background: #1d4ed8 !important; }
 .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
 .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
@@ -513,8 +591,8 @@ css = """
 .source-item:last-child { border-bottom: none; }
 /* .source-item:hover { background-color: #f9fafb; } */
 .source-number { font-weight: bold; margin-right: 12px; color: #6b7280; width: 20px; text-align: right; flex-shrink: 0;}
-.source-content { flex: 1; }
-.source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; transition: all 0.2s; font-size: 0.95em; }
 .source-title:hover { color: #1d4ed8; text-decoration: underline; }
 .source-date { color: #6b7280; font-size: 0.8em; margin-left: 8px; }
 .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
@@ -542,10 +620,10 @@ css = """
 .markdown-content th { background: #f9fafb !important; font-weight: 600; }
 .accordion { background: #f9fafb !important; border: 1px solid #e5e7eb !important; border-radius: 8px !important; margin-top: 1rem !important; box-shadow: none !important; }
 .accordion > .label-wrap { padding: 10px 15px !important; } /* Style accordion header */
-.voice-selector { margin: 0; padding: 0; }
-.voice-selector div[data-testid="dropdown"] { /* Target the specific dropdown container */ height: 44px !important; }
 .voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; transition: all 0.2s; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
-.voice-selector select:focus { border-color: #2563eb !important; box-shadow: none !important; }
 .audio-player { margin-top: 1rem; background: #f9fafb !important; border-radius: 8px !important; padding: 0.5rem !important; border: 1px solid #e5e7eb;}
 .audio-player audio { width: 100% !important; }
 .searching, .error { padding: 1rem; border-radius: 8px; text-align: center; margin: 1rem 0; border: 1px dashed; }
@@ -553,7 +631,8 @@ css = """
 .error { background: #fef2f2; color: #ef4444; border-color: #fecaca; }
 .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
 @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
-.searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; } /* Add span for animation */
 .dark .gradio-container { background-color: #111827 !important; }
 .dark #header { background: linear-gradient(135deg, #1f2937, #374151); }
 .dark #header h3 { color: #9ca3af; }
@@ -575,7 +654,7 @@ css = """
 .dark .source-title { color: #60a5fa; }
 .dark .source-title:hover { color: #93c5fd; }
 .dark .source-snippet { color: #d1d5db; }
-.dark .chat-history { background: #374151; border-color: #4b5563; scrollbar-color: #4b5563 #374151; }
 .dark .chat-history::-webkit-scrollbar-track { background: #374151; }
 .dark .chat-history::-webkit-scrollbar-thumb { background-color: #4b5563; }
 .dark .examples-container { background: #374151; border-color: #4b5563; }
@@ -592,112 +671,159 @@ css = """
 .dark .markdown-content th, .dark .markdown-content td { border-color: #4b5563 !important; }
 .dark .markdown-content th { background: #374151 !important; }
 .dark .accordion { background: #374151 !important; border-color: #4b5563 !important; }
 .dark .voice-selector select { background: #1f2937 !important; color: #d1d5db !important; border-color: #4b5563 !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important;}
 .dark .voice-selector select:focus { border-color: #3b82f6 !important; }
 .dark .audio-player { background: #374151 !important; border-color: #4b5563;}
 .dark .searching { background: #1e3a8a; color: #93c5fd; border-color: #3b82f6; }
 .dark .error { background: #7f1d1d; color: #fca5a5; border-color: #ef4444; }
 .dark .no-sources { background: #374151; color: #9ca3af; border-color: #4b5563;}
 """
 with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
     chat_history = gr.State([])
-    with gr.Column(): # Main container
         with gr.Column(elem_id="header"):
             gr.Markdown("# 🔍 AI Search Assistant")
             gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
         with gr.Column(elem_classes="search-container"):
-            with gr.Row(elem_classes="search-box", equal_height=True):
                 search_input = gr.Textbox(
                     label="",
                     placeholder="Ask anything...",
-                    scale=5,
-                    container=False, # Important for direct styling
                     elem_classes="gradio-textbox"
                 )
                 voice_select = gr.Dropdown(
                     choices=list(VOICE_CHOICES.keys()),
-                    value=list(VOICE_CHOICES.keys())[0],
-                    label="", # No label needed here
-                    scale=2,
                     container=False, # Important
                     elem_classes="voice-selector gradio-dropdown"
                 )
                 search_btn = gr.Button(
                     "Search",
                     variant="primary",
-                    scale=1,
                     elem_classes="gradio-button"
                 )
             with gr.Row(elem_classes="results-container", equal_height=False):
-                with gr.Column(scale=3): # Wider column for answer + history
                     with gr.Column(elem_classes="answer-box"):
-                        answer_output = gr.Markdown(elem_classes="markdown-content", value="*Your answer will appear here...*")
-                        # Audio player below the answer
-                        audio_output = gr.Audio(label="Voice Response", elem_classes="audio-player", type="numpy") # Expect numpy array
                     with gr.Accordion("Chat History", open=False, elem_classes="accordion"):
-                        chat_history_display = gr.Chatbot(elem_classes="chat-history", label="History", height=300)
-                with gr.Column(scale=2): # Narrower column for sources
                      with gr.Column(elem_classes="sources-box"):
                         gr.Markdown("### Sources")
                         sources_output = gr.HTML(value="<div class='no-sources'>Sources will appear here after searching.</div>")
             with gr.Row(elem_classes="examples-container"):
                  gr.Examples(
                     examples=[
                         "Latest news about renewable energy",
                         "Explain the concept of Large Language Models (LLMs)",
                         "What are the symptoms and prevention tips for the flu?",
-                        "Compare Python and JavaScript for web development"
                     ],
-                    inputs=search_input,
                     label="Try these examples:",
                     elem_classes="gradio-examples" # Add class for potential styling
                 )
     # --- Event Handling ---
-    # Use the async function for processing
     async def handle_interaction(query, history, voice_display_name):
-        """Wrapper to handle the async generator from process_query_async"""
         try:
-            async for update in process_query_async(query, history, voice_display_name):
-                # Ensure the button state is updated correctly
-                ans_out, src_out, btn_state, hist_display, aud_out = update
-                yield ans_out, src_out, btn_state, hist_display, aud_out
         except Exception as e:
-            print(f"Error in handle_interaction: {e}")
-            import traceback
-            traceback.print_exc()
             error_message = f"An unexpected error occurred: {e}"
             # Provide a final error state update
             yield (
                 error_message,
-                "<div class='error'>Error processing request.</div>",
                 gr.Button(value="Search", interactive=True), # Re-enable button on error
-                history + [[query, f"*Error: {error_message}*"]],
                 None
             )
-    # Corrected event listeners: Pass the voice_select component directly
     search_btn.click(
         fn=handle_interaction,
-        inputs=[search_input, chat_history, voice_select], # Pass voice_select component
-        outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
     search_input.submit(
         fn=handle_interaction,
-        inputs=[search_input, chat_history, voice_select], # Pass voice_select component
-        outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
 if __name__ == "__main__":
-    # Launch the app
-    demo.queue(max_size=20).launch(debug=True, share=True) # Enable debug for more logs

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import spaces # Keep for potential future use or other decorators
 from duckduckgo_search import DDGS
 import time
 import torch
 import threading
 from concurrent.futures import ThreadPoolExecutor
 import warnings
+import traceback # For detailed error logging
 # Suppress specific warnings if needed (optional)
 warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
+# Suppress another common warning with torch.compile backend
+# warnings.filterwarnings("ignore", message="Backend 'inductor' is not available.")
 # --- Configuration ---
 MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
 MAX_SEARCH_RESULTS = 5
 TTS_SAMPLE_RATE = 24000
+MAX_TTS_CHARS = 1000 # Max characters for a single TTS chunk
+# GPU_DURATION = 60 # Informational only now, decorator is removed
+MAX_NEW_TOKENS = 300 # Increased slightly
 TEMPERATURE = 0.7
 TOP_P = 0.95
+KOKORO_PATH = 'Kokoro-82M' # Path to TTS model directory
 # --- Initialization ---
+# Use a ThreadPoolExecutor for potentially blocking I/O or CPU-bound tasks
+executor = ThreadPoolExecutor(max_workers=4)
 # Initialize model and tokenizer with better error handling
 try:
     print("Loading tokenizer...")
     # Determine device map based on CUDA availability
     device_map = "auto" if torch.cuda.is_available() else {"": "cpu"}
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Use float32 on CPU
+    print(f"Attempting to load model with device_map='{device_map}' and dtype={torch_dtype}")
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map=device_map,
+        # offload_folder="offload", # Enable if needed for large models and disk space is available
+        low_cpu_mem_usage=True, # Important for faster loading
+        torch_dtype=torch_dtype,
+        # attn_implementation="flash_attention_2" # Optional: requires flash-attn installed, use if available for speedup on compatible GPUs
     )
+    print(f"Model loaded successfully. Device map: {model.hf_device_map}")
+    # Ensure model is in evaluation mode
+    model.eval()
 except Exception as e:
+    print(f"FATAL: Error initializing LLM model: {str(e)}")
+    print(traceback.format_exc())
+    raise # Stop execution if model loading fails
 # --- TTS Setup ---
 VOICE_CHOICES = {
 TTS_ENABLED = False
 TTS_MODEL = None
 VOICEPACKS = {}  # Cache voice packs
 # Initialize Kokoro TTS in a separate thread to avoid blocking startup
 def setup_tts():
     global TTS_ENABLED, TTS_MODEL, VOICEPACKS
+    # Check privileges for apt-get
+    can_sudo = shutil.which('sudo') is not None
     try:
         # Check if Kokoro already exists
         if not os.path.exists(KOKORO_PATH):
             print("Cloning Kokoro-82M repository...")
             # Install git-lfs if not present (might need sudo/apt)
             try:
+                lfs_install_cmd = ['git', 'lfs', 'install']
+                subprocess.run(lfs_install_cmd, check=True, capture_output=True, text=True)
             except (FileNotFoundError, subprocess.CalledProcessError) as lfs_err:
+                print(f"Warning: git-lfs command failed: {lfs_err}. Cloning might be slow or incomplete.")
+            clone_cmd = ['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M', KOKORO_PATH]
             result = subprocess.run(clone_cmd, check=True, capture_output=True, text=True)
             print("Kokoro cloned successfully.")
+            # print(result.stdout) # Can be verbose
+            # Optionally pull LFS files again (sometimes clone doesn't get them all)
+            try:
+                print("Running git lfs pull...")
+                lfs_pull_cmd = ['git', 'lfs', 'pull']
+                subprocess.run(lfs_pull_cmd, cwd=KOKORO_PATH, check=True, capture_output=True, text=True)
+                print("git lfs pull completed.")
+            except (FileNotFoundError, subprocess.CalledProcessError) as lfs_pull_err:
+                 print(f"Warning: git lfs pull failed: {lfs_pull_err}")
         else:
+            print(f"{KOKORO_PATH} directory already exists.")
         # Install espeak (essential for phonemization)
         print("Attempting to install espeak-ng or espeak...")
+        apt_update_cmd = ['apt-get', 'update', '-qq']
+        install_cmd_ng = ['apt-get', 'install', '-y', '-qq', 'espeak-ng']
+        install_cmd_legacy = ['apt-get', 'install', '-y', '-qq', 'espeak']
+        if can_sudo:
+             apt_update_cmd.insert(0, 'sudo')
+             install_cmd_ng.insert(0, 'sudo')
+             install_cmd_legacy.insert(0, 'sudo')
         try:
+            print(f"Running: {' '.join(apt_update_cmd)}")
+            subprocess.run(apt_update_cmd, check=True, capture_output=True)
+            print(f"Running: {' '.join(install_cmd_ng)}")
+            subprocess.run(install_cmd_ng, check=True, capture_output=True)
             print("espeak-ng installed successfully.")
+        except (FileNotFoundError, subprocess.CalledProcessError) as ng_err:
+            print(f"espeak-ng installation failed ({ng_err}), trying espeak...")
             try:
+                print(f"Running: {' '.join(install_cmd_legacy)}")
+                subprocess.run(install_cmd_legacy, check=True, capture_output=True)
                 print("espeak installed successfully.")
+            except (FileNotFoundError, subprocess.CalledProcessError) as legacy_err:
+                print(f"ERROR: Could not install espeak-ng or espeak: {legacy_err}. TTS functionality will be disabled.")
                 return # Cannot proceed without espeak
         # Set up Kokoro TTS
                 from models import build_model
                 from kokoro import generate as generate_tts_internal # Avoid name clash
+                # Make these functions accessible globally if needed
                 globals()['build_model'] = build_model
                 globals()['generate_tts_internal'] = generate_tts_internal
                 device = 'cuda' if torch.cuda.is_available() else 'cpu'
                 print(f"Loading TTS model onto device: {device}")
                 model_file = os.path.join(KOKORO_PATH, 'kokoro-v0_19.pth')
                 if not os.path.exists(model_file):
+                     print(f"Error: TTS model file not found at {model_file}. Attempting git lfs pull again...")
                      try:
+                         lfs_pull_cmd = ['git', 'lfs', 'pull']
+                         subprocess.run(lfs_pull_cmd, cwd=KOKORO_PATH, check=True, capture_output=True, text=True)
                          if not os.path.exists(model_file):
+                            print(f"ERROR: TTS model file STILL not found at {model_file} after lfs pull. TTS disabled.")
                             return
                      except Exception as lfs_pull_err:
+                         print(f"Error during git lfs pull: {lfs_pull_err}. TTS disabled.")
                          return
                 TTS_MODEL = build_model(model_file, device)
+                print("TTS model loaded.")
+                # Preload voices
                 for voice_name, voice_id in VOICE_CHOICES.items():
+                    voice_file_path = os.path.join(KOKORO_PATH, 'voices', f'{voice_id}.pt')
+                    if os.path.exists(voice_file_path):
+                        try:
+                            print(f"Loading voice: {voice_id} ({voice_name})")
+                            # Load using torch.load, map_location handles device placement
+                            VOICEPACKS[voice_id] = torch.load(voice_file_path, map_location=device)
+                        except Exception as e:
+                            print(f"Warning: Could not load voice {voice_id}: {str(e)}")
+                    else:
+                        print(f"Info: Voice file {voice_file_path} for '{voice_name}' not found, skipping.")
+                if not VOICEPACKS:
+                    print("ERROR: No voicepacks could be loaded. TTS disabled.")
+                    return
+                # Ensure default 'af' is loaded if possible, even if not explicitly in choices sometimes
+                if 'af' not in VOICEPACKS:
+                     voice_file_path = os.path.join(KOKORO_PATH, 'voices', 'af.pt')
+                     if os.path.exists(voice_file_path):
+                         try:
+                            print(f"Loading fallback default voice: af")
+                            VOICEPACKS['af'] = torch.load(voice_file_path, map_location=device)
+                         except Exception as e:
+                            print(f"Warning: Could not load fallback default voice 'af': {str(e)}")
                 TTS_ENABLED = True
+                print("TTS setup completed successfully.")
             except ImportError as ie:
+                print(f"ERROR: Importing Kokoro modules failed: {ie}. Check if {KOKORO_PATH} exists and dependencies are met.")
             except Exception as model_load_err:
+                print(f"ERROR: Loading TTS model or voices failed: {model_load_err}")
+                print(traceback.format_exc())
         else:
+            print(f"ERROR: {KOKORO_PATH} directory not found. TTS disabled.")
     except subprocess.CalledProcessError as spe:
+        print(f"ERROR: A subprocess command failed during TTS setup: {spe}")
         print(f"Command: {' '.join(spe.cmd)}")
+        if spe.stderr: print(f"Stderr: {spe.stderr.strip()}")
+        print("TTS setup failed.")
     except Exception as e:
+        print(f"ERROR: An unexpected error occurred during TTS setup: {str(e)}")
+        print(traceback.format_exc())
         TTS_ENABLED = False
 # Start TTS setup in a separate thread
+import shutil
 print("Starting TTS setup in background thread...")
 tts_thread = threading.Thread(target=setup_tts, daemon=True)
 tts_thread.start()
 # --- Search and Generation Functions ---
 @lru_cache(maxsize=128)
 def get_web_results(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, str]]:
+    """Get web search results using DuckDuckGo with caching."""
+    print(f"[Web Search] Searching for: '{query}' (max_results={max_results})")
     try:
+        # Use DDGS context manager for cleanup
         with DDGS() as ddgs:
+            # Fetch results using ddgs.text()
+            results = list(ddgs.text(query, max_results=max_results, safesearch='moderate', timelimit='y')) # Limit to past year
+            print(f"[Web Search] Found {len(results)} results.")
             formatted_results = []
+            for i, result in enumerate(results):
                 formatted_results.append({
+                    "id": i + 1, # Add simple ID for citation
+                    "title": result.get("title", "No Title Available"),
                     "snippet": result.get("body", "No Snippet Available"),
                     "url": result.get("href", "#"),
                 })
             return formatted_results
     except Exception as e:
+        print(f"[Web Search] Error: {e}")
+        print(traceback.format_exc())
         return []
 def format_prompt(query: str, context: List[Dict[str, str]]) -> str:
+    """Format the prompt with web context for the LLM."""
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    # Format context with IDs for citation
+    context_lines = []
+    if context:
+        for res in context:
+            context_lines.append(f"[{res['id']}] {res['title']}\n{res['snippet']}")
+        context_str = "\n\n".join(context_lines)
+    else:
+        context_str = "No web context available."
+    # Clear instructions for the model
     prompt = f"""You are a helpful AI assistant. Your task is to answer the user's query based *only* on the provided web search context.
+Follow these instructions carefully:
+1.  Synthesize the information from the context to provide a comprehensive answer.
+2.  Cite the sources used in your answer using bracket notation with the source ID, like [1], [2], etc.
+3.  If multiple sources support a point, you can cite them together, e.g., [1][3].
+4.  Do *not* add information that is not present in the context.
+5.  If the context does not contain relevant information to answer the query, clearly state that you cannot answer based on the provided context.
+6.  Format the answer clearly using markdown.
 Current Time: {current_time}
 Web Context:
+---
+{context_str}
+---
 User Query: {query}
 Answer:"""
+    # print(f"--- Formatted Prompt ---\n{prompt[:1000]}...\n--- End Prompt ---") # Debugging: Print start of prompt
     return prompt
 def format_sources(web_results: List[Dict[str, str]]) -> str:
+    """Format sources into HTML for display."""
     if not web_results:
+        return "<div class='no-sources'>No sources found for this query.</div>"
     sources_html = "<div class='sources-container'>"
+    for res in web_results:
         title = res.get("title", "Source")
         url = res.get("url", "#")
+        snippet = res.get("snippet", "")
+        # Basic HTML escaping for snippet and title
+        title_safe = gr. gradio.utils.escape_html(title)
+        snippet_safe = gr. gradio.utils.escape_html(snippet[:150] + ("..." if len(snippet) > 150 else ""))
         sources_html += f"""
         <div class='source-item'>
+            <div class='source-number'>[{res['id']}]</div>
             <div class='source-content'>
+                <a href="{url}" target="_blank" class='source-title' title="{url}">{title_safe}</a>
+                <div class='source-snippet'>{snippet_safe}</div>
             </div>
         </div>
         """
     sources_html += "</div>"
     return sources_html
+# --- Core Async Logic ---
+# NOTE: @spaces.GPU decorator is REMOVED because it's incompatible with async def
 async def generate_answer(prompt: str) -> str:
+    """Generate answer using the DeepSeek model (Async Wrapper)."""
+    print(f"[LLM Generate] Generating answer for prompt (length {len(prompt)})...")
+    start_time = time.time()
     try:
+        # Tokenize input - ensure it runs on the correct device implicitly via model.device
         inputs = tokenizer(
             prompt,
             return_tensors="pt",
             padding=True,
             truncation=True,
+            max_length=1024, # Model's context window might be larger, adjust if known
             return_attention_mask=True
         ).to(model.device)
+        # Use torch.inference_mode() for efficiency
+        with torch.inference_mode(), torch.cuda.amp.autocast(enabled=(model.dtype == torch.float16)):
+            # Run model.generate in a separate thread to avoid blocking asyncio event loop
+            outputs = await asyncio.to_thread(
                 model.generate,
+                input_ids=inputs.input_ids,
                 attention_mask=inputs.attention_mask,
                 max_new_tokens=MAX_NEW_TOKENS,
                 temperature=TEMPERATURE,
                 top_p=TOP_P,
                 pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id, # Explicitly set EOS token
                 do_sample=True,
                 num_return_sequences=1
             )
+        # Decode only the newly generated tokens
+        # output_ids = outputs[0][inputs.input_ids.shape[1]:] # Slice generated part
+        # answer_part = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+        # Alternative: Decode full output and split (can be less reliable if prompt has "Answer:")
         full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        answer_marker = "Answer:"
+        marker_index = full_output.rfind(answer_marker) # Use rfind to find the last occurrence
+        if marker_index != -1:
+            answer_part = full_output[marker_index + len(answer_marker):].strip()
+        else:
+            # Fallback: try to remove the prompt text (less reliable)
+            prompt_decoded = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
+            if full_output.startswith(prompt_decoded):
+                 answer_part = full_output[len(prompt_decoded):].strip()
+                 # Check if the marker is now at the beginning
+                 if answer_part.startswith(answer_marker):
+                     answer_part = answer_part[len(answer_marker):].strip()
+            else:
+                 print("[LLM Generate] Warning: 'Answer:' marker not found and prompt prefix mismatch. Using full output.")
+                 answer_part = full_output # Use full output as last resort
+        end_time = time.time()
+        print(f"[LLM Generate] Answer generated successfully in {end_time - start_time:.2f}s. Length: {len(answer_part)}")
+        return answer_part if answer_part else "*Model did not generate a response.*"
     except Exception as e:
+        print(f"[LLM Generate] Error: {e}")
+        print(traceback.format_exc())
         return f"Error generating answer: {str(e)}"
+# NOTE: @spaces.GPU decorator is REMOVED because it's incompatible with async def
 async def generate_speech(text: str, voice_id: str = 'af') -> Tuple[int, np.ndarray] | None:
     """Generate speech from text using Kokoro TTS model (Async Wrapper)."""
     global TTS_MODEL, TTS_ENABLED, VOICEPACKS
     if not TTS_ENABLED or TTS_MODEL is None:
+        print("[TTS Generate] Skipping: TTS not enabled or model not loaded.")
         return None
     if 'generate_tts_internal' not in globals():
+        print("[TTS Generate] Skipping: TTS generation function not found.")
+        return None
+    if not text or not text.strip():
+        print("[TTS Generate] Skipping: Empty text provided.")
         return None
+    print(f"[TTS Generate] Requesting speech for text (length {len(text)}) with voice '{voice_id}'")
+    start_time = time.time()
+    try:
+        device = TTS_MODEL.device
+        # Ensure voicepack is loaded
         if voice_id not in VOICEPACKS:
+            print(f"[TTS Generate] Warning: Voice '{voice_id}' not preloaded. Attempting fallback.")
+            # Attempt fallback to default 'af' if available
+            voice_id = 'af'
+            if 'af' not in VOICEPACKS:
+                print("[TTS Generate] Error: Default voice 'af' also not available. Cannot generate audio.")
+                return None
+            print("[TTS Generate] Using default voice 'af'.")
         # Clean the text (simple cleaning)
+        # Remove markdown citations like [1], [2][3] etc.
+        clean_text = re.sub(r'\[\d+\](\[\d+\])*', '', text)
+        # Remove other common markdown artifacts
+        clean_text = clean_text.replace('*', '').replace('#', '').replace('`', '')
+        # Remove excessive whitespace
+        clean_text = ' '.join(clean_text.split())
         if not clean_text.strip():
+            print("[TTS Generate] Skipping: Text is empty after cleaning.")
             return None
+        # Truncate if too long
         if len(clean_text) > MAX_TTS_CHARS:
+            print(f"[TTS Generate] Warning: Text too long ({len(clean_text)} chars), truncating to {MAX_TTS_CHARS}.")
             clean_text = clean_text[:MAX_TTS_CHARS]
+            # Find last punctuation or space for cleaner cut
+            cut_off = max(clean_text.rfind('.'), clean_text.rfind('?'), clean_text.rfind('!'), clean_text.rfind(' '))
+            if cut_off != -1:
+                clean_text = clean_text[:cut_off+1]
+            clean_text += "..." # Indicate truncation
+        print(f"[TTS Generate] Generating audio for: '{clean_text[:100]}...'")
         gen_func = globals()['generate_tts_internal']
+        # Run the blocking TTS generation in the thread pool executor
+        audio_data, _ = await asyncio.get_event_loop().run_in_executor(
+            executor,
             gen_func,
             TTS_MODEL,
             clean_text,
             VOICEPACKS[voice_id],
+            'afr' # Language code for Kokoro (check if 'afr' or 'eng' or other is correct for your voices)
         )
         if isinstance(audio_data, torch.Tensor):
             # Move tensor to CPU before converting to numpy if it's not already
+            audio_np = audio_data.detach().cpu().numpy()
         elif isinstance(audio_data, np.ndarray):
             audio_np = audio_data
         else:
+            print("[TTS Generate] Warning: Unexpected audio data type received.")
             return None
+        end_time = time.time()
+        print(f"[TTS Generate] Audio generated successfully in {end_time - start_time:.2f}s. Shape: {audio_np.shape}")
+        # Ensure it's 1D array
+        if audio_np.ndim > 1:
+             audio_np = audio_np.flatten()
         return (TTS_SAMPLE_RATE, audio_np)
     except Exception as e:
+        print(f"[TTS Generate] Error: {str(e)}")
+        print(traceback.format_exc())
         return None
 # Helper to get voice ID from display name
     """Maps the user-friendly voice name to the internal voice ID."""
     return VOICE_CHOICES.get(voice_display_name, 'af') # Default to 'af' if not found
+# --- Main Processing Logic (Async Generator) ---
+import re # Import regex for cleaning
 async def process_query_async(query: str, history: List[List[str]], selected_voice_display_name: str):
     """Asynchronously process user query: search -> generate answer -> generate speech"""
+    print(f"\n--- New Query Processing ---")
+    print(f"Query: '{query}', Voice: '{selected_voice_display_name}'")
+    if not query or not query.strip():
+        print("Empty query received.")
         yield (
+            "Please enter a query.", "", gr.Button(value="Search", interactive=True), history, None
         )
         return
     if history is None: history = []
+    # Append user query to history immediately for display
+    current_history = history + [[query, None]] # Placeholder for assistant response
     # 1. Initial state: Searching
     yield (
+        "*Searching the web...*",
+        "<div class='searching'><span>Searching the web...</span></div>", # Added span for CSS animation
         gr.Button(value="Searching...", interactive=False), # Disable button
         current_history,
         None
     sources_html = format_sources(web_results)
     # Update state: Analyzing results
     yield (
+        "*Analyzing search results and generating answer...*",
         sources_html,
         gr.Button(value="Generating...", interactive=False),
+        current_history, # History still shows user query, assistant response is pending
         None
     )
     # 3. Generate Answer (non-blocking, potentially on GPU)
     prompt = format_prompt(query, web_results)
+    final_answer = await generate_answer(prompt) # This is already async
+    # Update history with the final answer BEFORE generating audio
     current_history[-1][1] = final_answer
+    # Update state: Answer generated, preparing audio
     yield (
         final_answer,
         sources_html,
         gr.Button(value="Audio...", interactive=False),
+        current_history, # Now history includes the answer
         None
     )
     audio = None
     tts_message = ""
     if not tts_thread.is_alive() and not TTS_ENABLED:
+        print("[TTS Status] TTS setup failed or is disabled.")
+        tts_message = "\n\n*(TTS is disabled or failed to initialize)*"
     elif tts_thread.is_alive():
+        print("[TTS Status] TTS is still initializing in the background.")
+        tts_message = "\n\n*(TTS is still initializing, audio may be delayed or unavailable)*"
     elif TTS_ENABLED:
         voice_id = get_voice_id(selected_voice_display_name)
+        # Only generate audio if the answer generation was successful
+        if not final_answer.startswith("Error"):
+             audio = await generate_speech(final_answer, voice_id) # This is already async
+             if audio is None:
+                 print(f"[TTS Status] Audio generation failed for voice '{voice_id}'.")
+                 tts_message = f"\n\n*(Audio generation failed)*"
+             else:
+                 print("[TTS Status] Audio generated successfully.")
+        else:
+             print("[TTS Status] Skipping audio generation due to answer error.")
+             tts_message = "\n\n*(Audio skipped due to answer generation error)*"
     # 5. Final state: Show everything
+    print("--- Query Processing Complete ---")
     yield (
         final_answer + tts_message,
         sources_html,
         gr.Button(value="Search", interactive=True), # Re-enable button
+        current_history, # Final history state
         audio
     )
 # --- Gradio Interface ---
+# (CSS remains the same as your previous version)
 css = """
+/* ... [Your existing refined CSS] ... */
 .gradio-container { max-width: 1200px !important; background-color: #f7f7f8 !important; }
 #header { text-align: center; margin-bottom: 2rem; padding: 2rem 0; background: linear-gradient(135deg, #1a1b1e, #2d2e32); border-radius: 12px; color: white; box-shadow: 0 8px 32px rgba(0,0,0,0.2); }
 #header h1 { color: white; font-size: 2.5rem; margin-bottom: 0.5rem; text-shadow: 0 2px 4px rgba(0,0,0,0.3); }
 #header h3 { color: #a8a9ab; }
 .search-container { background: #ffffff; border: 1px solid #e0e0e0; border-radius: 12px; box-shadow: 0 4px 16px rgba(0,0,0,0.05); padding: 1.5rem; margin-bottom: 1.5rem; }
+.search-box { padding: 0; margin-bottom: 1rem; display: flex; align-items: center; }
+.search-box .gradio-textbox { border-radius: 8px 0 0 8px !important; height: 44px !important; flex-grow: 1; }
+.search-box .gradio-dropdown { border-radius: 0 !important; margin-left: -1px; margin-right: -1px; height: 44px !important; width: 180px; flex-shrink: 0; }
+.search-box .gradio-button { border-radius: 0 8px 8px 0 !important; height: 44px !important; flex-shrink: 0; }
+.search-box input[type="text"] { background: #f7f7f8 !important; border: 1px solid #d1d5db !important; color: #1f2937 !important; transition: all 0.3s ease; height: 100% !important; padding: 0 12px !important;}
+.search-box input[type="text"]:focus { border-color: #2563eb !important; box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.2) !important; background: white !important; z-index: 1; }
 .search-box input[type="text"]::placeholder { color: #9ca3af !important; }
+.search-box button { background: #2563eb !important; border: none !important; color: white !important; box-shadow: 0 1px 2px rgba(0,0,0,0.05) !important; transition: all 0.3s ease !important; height: 100% !important; }
 .search-box button:hover { background: #1d4ed8 !important; }
 .search-box button:disabled { background: #9ca3af !important; cursor: not-allowed; }
 .results-container { background: transparent; padding: 0; margin-top: 1.5rem; }
 .source-item:last-child { border-bottom: none; }
 /* .source-item:hover { background-color: #f9fafb; } */
 .source-number { font-weight: bold; margin-right: 12px; color: #6b7280; width: 20px; text-align: right; flex-shrink: 0;}
+.source-content { flex: 1; min-width: 0;} /* Allow content to shrink */
+.source-title { color: #2563eb; font-weight: 500; text-decoration: none; display: block; margin-bottom: 4px; transition: all 0.2s; font-size: 0.95em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;}
 .source-title:hover { color: #1d4ed8; text-decoration: underline; }
 .source-date { color: #6b7280; font-size: 0.8em; margin-left: 8px; }
 .source-snippet { color: #4b5563; font-size: 0.9em; line-height: 1.5; }
 .markdown-content th { background: #f9fafb !important; font-weight: 600; }
 .accordion { background: #f9fafb !important; border: 1px solid #e5e7eb !important; border-radius: 8px !important; margin-top: 1rem !important; box-shadow: none !important; }
 .accordion > .label-wrap { padding: 10px 15px !important; } /* Style accordion header */
+.voice-selector { margin: 0; padding: 0; height: 100%; }
+.voice-selector div[data-testid="dropdown"] { height: 100% !important; border-radius: 0 !important;}
 .voice-selector select { background: white !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-left: none !important; border-right: none !important; border-radius: 0 !important; height: 100% !important; padding: 0 10px !important; transition: all 0.2s; appearance: none !important; -webkit-appearance: none !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important; background-position: right 0.5rem center !important; background-repeat: no-repeat !important; background-size: 1.5em 1.5em !important; padding-right: 2.5rem !important; }
+.voice-selector select:focus { border-color: #2563eb !important; box-shadow: none !important; z-index: 1; position: relative;}
 .audio-player { margin-top: 1rem; background: #f9fafb !important; border-radius: 8px !important; padding: 0.5rem !important; border: 1px solid #e5e7eb;}
 .audio-player audio { width: 100% !important; }
 .searching, .error { padding: 1rem; border-radius: 8px; text-align: center; margin: 1rem 0; border: 1px dashed; }
 .error { background: #fef2f2; color: #ef4444; border-color: #fecaca; }
 .no-sources { padding: 1rem; text-align: center; color: #6b7280; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb;}
 @keyframes pulse { 0% { opacity: 0.7; } 50% { opacity: 1; } 100% { opacity: 0.7; } }
+.searching span { animation: pulse 1.5s infinite ease-in-out; display: inline-block; }
+/* Dark Mode Styles */
 .dark .gradio-container { background-color: #111827 !important; }
 .dark #header { background: linear-gradient(135deg, #1f2937, #374151); }
 .dark #header h3 { color: #9ca3af; }
 .dark .source-title { color: #60a5fa; }
 .dark .source-title:hover { color: #93c5fd; }
 .dark .source-snippet { color: #d1d5db; }
+.dark .chat-history { background: #374151; border-color: #4b5563; scrollbar-color: #4b5563 #374151; color: #d1d5db;} /* Ensure chat text is visible */
 .dark .chat-history::-webkit-scrollbar-track { background: #374151; }
 .dark .chat-history::-webkit-scrollbar-thumb { background-color: #4b5563; }
 .dark .examples-container { background: #374151; border-color: #4b5563; }
 .dark .markdown-content th, .dark .markdown-content td { border-color: #4b5563 !important; }
 .dark .markdown-content th { background: #374151 !important; }
 .dark .accordion { background: #374151 !important; border-color: #4b5563 !important; }
+.dark .accordion > .label-wrap { color: #d1d5db !important; } /* Accordion label color */
 .dark .voice-selector select { background: #1f2937 !important; color: #d1d5db !important; border-color: #4b5563 !important; background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%239ca3af' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e") !important;}
 .dark .voice-selector select:focus { border-color: #3b82f6 !important; }
 .dark .audio-player { background: #374151 !important; border-color: #4b5563;}
+.dark .audio-player audio::-webkit-media-controls-panel { background-color: #374151; } /* Style audio player controls */
+.dark .audio-player audio::-webkit-media-controls-play-button { color: #d1d5db; }
+.dark .audio-player audio::-webkit-media-controls-current-time-display { color: #9ca3af; }
+.dark .audio-player audio::-webkit-media-controls-time-remaining-display { color: #9ca3af; }
 .dark .searching { background: #1e3a8a; color: #93c5fd; border-color: #3b82f6; }
 .dark .error { background: #7f1d1d; color: #fca5a5; border-color: #ef4444; }
 .dark .no-sources { background: #374151; color: #9ca3af; border-color: #4b5563;}
 """
 with gr.Blocks(title="AI Search Assistant", css=css, theme=gr.themes.Default(primary_hue="blue")) as demo:
+    # chat_history state persists across interactions for a single user session
     chat_history = gr.State([])
+    with gr.Column(): # Main container for vertical layout
+        # Header Section
         with gr.Column(elem_id="header"):
             gr.Markdown("# 🔍 AI Search Assistant")
             gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
+        # Search Input and Controls Section
         with gr.Column(elem_classes="search-container"):
+            with gr.Row(elem_classes="search-box", equal_height=False): # Use Row for horizontal elements
                 search_input = gr.Textbox(
                     label="",
                     placeholder="Ask anything...",
+                    scale=5, # Takes more horizontal space
+                    container=False, # Important for direct styling within Row
                     elem_classes="gradio-textbox"
                 )
                 voice_select = gr.Dropdown(
                     choices=list(VOICE_CHOICES.keys()),
+                    value=list(VOICE_CHOICES.keys())[0], # Default voice display name
+                    label="", # Visually hidden label
+                    scale=1, # Takes less space
+                    min_width=180, # Fixed width for dropdown
                     container=False, # Important
                     elem_classes="voice-selector gradio-dropdown"
                 )
                 search_btn = gr.Button(
                     "Search",
                     variant="primary",
+                    scale=0, # Minimal width needed for text
+                    min_width=100,
                     elem_classes="gradio-button"
                 )
+            # Results Display Section (using Columns for side-by-side layout)
             with gr.Row(elem_classes="results-container", equal_height=False):
+                # Left Column: Answer and Chat History
+                with gr.Column(scale=3): # Takes 3 parts of the width
                     with gr.Column(elem_classes="answer-box"):
+                        answer_output = gr.Markdown(value="*Your answer will appear here...*", elem_classes="markdown-content")
+                        # Audio player below the answer text
+                        audio_output = gr.Audio(
+                            label="Voice Response",
+                            type="numpy", # Expects (rate, numpy_array) tuple
+                            autoplay=False, # Don't autoplay by default
+                            show_label=False, # Hide the "Voice Response" label visually
+                            elem_classes="audio-player"
+                         )
                     with gr.Accordion("Chat History", open=False, elem_classes="accordion"):
+                        chat_history_display = gr.Chatbot(
+                            label="Conversation",
+                             bubble_full_width=True, # Bubbles take full width
+                            height=400,
+                            elem_classes="chat-history"
+                        )
+                # Right Column: Sources
+                with gr.Column(scale=2): # Takes 2 parts of the width
                      with gr.Column(elem_classes="sources-box"):
                         gr.Markdown("### Sources")
                         sources_output = gr.HTML(value="<div class='no-sources'>Sources will appear here after searching.</div>")
+            # Example Prompts Section
             with gr.Row(elem_classes="examples-container"):
                  gr.Examples(
                     examples=[
                         "Latest news about renewable energy",
                         "Explain the concept of Large Language Models (LLMs)",
                         "What are the symptoms and prevention tips for the flu?",
+                        "Compare Python and JavaScript for web development",
+                        "Summarize the main points of the Paris Agreement on climate change",
                     ],
+                    inputs=search_input, # Clicking example populates this input
                     label="Try these examples:",
                     elem_classes="gradio-examples" # Add class for potential styling
                 )
     # --- Event Handling ---
     async def handle_interaction(query, history, voice_display_name):
+        """Wrapper to handle the async generator and update outputs."""
+        print(f"[Interaction] Handling query: '{query}'")
+        outputs = { # Dictionary to hold the latest state of outputs
+            "answer": "...",
+            "sources": "...",
+            "button": gr.Button(value="Search", interactive=True),
+            "history": history,
+            "audio": None
+        }
         try:
+            # Iterate through the updates yielded by the async generator
+            async for update_tuple in process_query_async(query, history, voice_display_name):
+                # Unpack the tuple
+                ans_out, src_out, btn_state, hist_display, aud_out = update_tuple
+                # Update the outputs dictionary
+                outputs["answer"] = ans_out
+                outputs["sources"] = src_out
+                outputs["button"] = btn_state # Can be a gr.Button update dict or object
+                outputs["history"] = hist_display
+                outputs["audio"] = aud_out
+                # Yield the current state of all outputs
+                yield outputs["answer"], outputs["sources"], outputs["button"], outputs["history"], outputs["audio"]
         except Exception as e:
+            print(f"[Interaction] Error: {e}")
+            print(traceback.format_exc())
             error_message = f"An unexpected error occurred: {e}"
             # Provide a final error state update
+            final_error_history = history + [[query, f"*Error: {error_message}*"]] if query else history
             yield (
                 error_message,
+                "<div class='error'>Error processing request. Please check logs or try again.</div>",
                 gr.Button(value="Search", interactive=True), # Re-enable button on error
+                final_error_history,
                 None
             )
+    # Connect the handle_interaction function to the button click and input submit events
+    outputs_list = [answer_output, sources_output, search_btn, chat_history_display, audio_output]
+    inputs_list = [search_input, chat_history, voice_select] # Pass the dropdown component itself
     search_btn.click(
         fn=handle_interaction,
+        inputs=inputs_list,
+        outputs=outputs_list
     )
     search_input.submit(
         fn=handle_interaction,
+        inputs=inputs_list,
+        outputs=outputs_list
     )
 if __name__ == "__main__":
+    print("Starting Gradio application...")
+    # Launch the app with queuing enabled for handling multiple users
+    demo.queue(max_size=20).launch(
+        debug=True, # Enable Gradio debug mode for more logs
+        share=True, # Create a public link (useful for Spaces)
+        # server_name="0.0.0.0" # Bind to all interfaces if running locally and need external access
+    )