Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

App Files Files Community

awacke1 commited on Dec 19, 2024

Commit

4976812

verified ·

1 Parent(s): ceb3a99

Update app.py

Browse files

Files changed (1) hide show

app.py +327 -257

app.py CHANGED Viewed

@@ -13,21 +13,10 @@ import edge_tts
 import asyncio
 import base64
 import requests
-import plotly.graph_objects as go
-from gradio_client import Client
 from collections import defaultdict
-from bs4 import BeautifulSoup
 from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
-# Page configuration
-st.set_page_config(
-    page_title="Video Search & Research Assistant",
-    page_icon="🎥",
-    layout="wide",
-    initial_sidebar_state="auto",
-)
 # Initialize session state
 if 'search_history' not in st.session_state:
     st.session_state['search_history'] = []
@@ -37,18 +26,10 @@ if 'transcript_history' not in st.session_state:
     st.session_state['transcript_history'] = []
 if 'should_rerun' not in st.session_state:
     st.session_state['should_rerun'] = False
-# Custom styling
-st.markdown("""
-<style>
-    .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
-    .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
-    .stButton>button { margin-right: 0.5rem; }
-</style>
-""", unsafe_allow_html=True)
-# Initialize components
-speech_component = components.declare_component("speech_recognition", path="mycomponent")
 class VideoSearch:
     def __init__(self):
@@ -56,229 +37,245 @@ class VideoSearch:
         self.load_dataset()
     def fetch_dataset_rows(self):
-        """Fetch dataset from Hugging Face API with debug and caching"""
         try:
-            st.info("Fetching from Hugging Face API...")
             url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
             response = requests.get(url, timeout=30)
-            st.write(f"Response status: {response.status_code}")
             if response.status_code == 200:
                 data = response.json()
                 if 'rows' in data:
-                    # Extract actual row data from the nested structure
                     processed_rows = []
                     for row_data in data['rows']:
-                        if 'row' in row_data:  # Access the nested 'row' data
-                            processed_rows.append(row_data['row'])
                     df = pd.DataFrame(processed_rows)
-                    # Debug output
-                    st.write("DataFrame columns after processing:", list(df.columns))
-                    st.write("Number of rows:", len(df))
                     return df
-                else:
-                    st.error("No 'rows' found in API response")
-                    st.write("Raw API Response:", data)
-                    return self.load_example_data()
-            else:
-                st.error(f"API request failed with status code: {response.status_code}")
-                return self.load_example_data()
         except Exception as e:
-            st.error(f"Error fetching dataset: {str(e)}")
             return self.load_example_data()
     def load_example_data(self):
         """Load example data as fallback"""
         example_data = [
             {
                 "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
                 "youtube_id": "IO-vwtyicn4",
-                "description": "This video shows a close-up of an ancient text carved into a surface, with the text appearing to be in a cursive script.",
                 "views": 45489,
                 "start_time": 1452,
                 "end_time": 1458,
                 "video_embed": [0.014160037972033024, -0.003111184574663639, -0.016604168340563774],
                 "description_embed": [-0.05835828185081482, 0.02589797042310238, 0.11952091753482819]
-            },
-            {
-                "video_id": "a8ebde7d-d717-4c1e-8be4-bdb4bc0c544f",
-                "youtube_id": "mo4rEyF7gTE",
-                "description": "This video shows a close-up view of a classical architectural structure, featuring stone statues with ornate details.",
-                "views": 4468,
-                "start_time": 318,
-                "end_time": 324,
-                "video_embed": [0.015160037972033024, -0.004111184574663639, -0.017604168340563774],
-                "description_embed": [-0.06835828185081482, 0.03589797042310238, 0.12952091753482819]
-            },
-            {
-                "video_id": "d1be64a6-22e2-4fbd-a176-20749e7c3d8a",
-                "youtube_id": "IO-vwtyicn4",
-                "description": "This video shows a weathered ancient painting depicting figures in classical style with vibrant colors preserved.",
-                "views": 45489,
-                "start_time": 1698,
-                "end_time": 1704,
-                "video_embed": [0.016160037972033024, -0.005111184574663639, -0.018604168340563774],
-                "description_embed": [-0.07835828185081482, 0.04589797042310238, 0.13952091753482819]
             }
         ]
         return pd.DataFrame(example_data)
-    def prepare_features(self):
-        """Prepare and cache embeddings"""
-        try:
-            if 'video_embed' not in self.dataset.columns:
-                st.warning("Using example data embeddings")
-                self.dataset = self.load_example_data()
-            # Debug: Show raw data types and first row
-            st.write("Data Types:", self.dataset.dtypes)
-            st.write("\nFirst row of embeddings:")
-            st.write("video_embed type:", type(self.dataset['video_embed'].iloc[0]))
-            st.write("video_embed content:", self.dataset['video_embed'].iloc[0])
-            st.write("\ndescription_embed type:", type(self.dataset['description_embed'].iloc[0]))
-            st.write("description_embed content:", self.dataset['description_embed'].iloc[0])
-            # Convert string representations of embeddings back to numpy arrays
-            def safe_eval_list(s):
-                try:
-                    # Clean the string representation
-                    if isinstance(s, str):
-                        s = s.replace('[', '').replace(']', '').strip()
-                        # Split by whitespace and/or commas
-                        numbers = [float(x.strip()) for x in s.split() if x.strip()]
-                        return numbers
-                    elif isinstance(s, list):
-                        return [float(x) for x in s]
-                    else:
-                        st.error(f"Unexpected type for embedding: {type(s)}")
-                        return None
-                except Exception as e:
-                    st.error(f"Error parsing embedding: {str(e)}")
-                    st.write("Problematic string:", s)
-                    return None
-            # Process embeddings with detailed error reporting
-            video_embeds = []
-            text_embeds = []
-            for idx in range(len(self.dataset)):
-                try:
-                    video_embed = safe_eval_list(self.dataset['video_embed'].iloc[idx])
-                    desc_embed = safe_eval_list(self.dataset['description_embed'].iloc[idx])
-                    if video_embed is not None and desc_embed is not None:
-                        video_embeds.append(video_embed)
-                        text_embeds.append(desc_embed)
-                    else:
-                        st.warning(f"Skipping row {idx} due to parsing failure")
-                except Exception as e:
-                    st.error(f"Error processing row {idx}: {str(e)}")
-                    st.write("Row data:", self.dataset.iloc[idx])
-            if video_embeds and text_embeds:
-                try:
-                    self.video_embeds = np.array(video_embeds)
-                    self.text_embeds = np.array(text_embeds)
-                    st.success(f"Successfully processed {len(video_embeds)} embeddings")
-                    st.write("Video embeddings shape:", self.video_embeds.shape)
-                    st.write("Text embeddings shape:", self.text_embeds.shape)
-                except Exception as e:
-                    st.error(f"Error converting to numpy arrays: {str(e)}")
-            else:
-                st.warning("No valid embeddings found, using random embeddings")
-                num_rows = len(self.dataset)
-                self.video_embeds = np.random.randn(num_rows, 384)
-                self.text_embeds = np.random.randn(num_rows, 384)
-        except Exception as e:
-            st.error(f"Error preparing features: {str(e)}")
-            import traceback
-            st.write("Traceback:", traceback.format_exc())
-            # Create random embeddings as fallback
-            num_rows = len(self.dataset)
-            self.video_embeds = np.random.randn(num_rows, 384)
-            self.text_embeds = np.random.randn(num_rows, 384)
     def load_dataset(self):
-        try:
-            self.dataset = self.fetch_dataset_rows()
-            if self.dataset is not None:
-                self.prepare_features()
-            else:
-                self.create_dummy_data()
-        except Exception as e:
-            st.error(f"Error loading dataset: {e}")
-            self.create_dummy_data()
-    def prepare_features(self):
-        try:
-            self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
-                                        for e in self.dataset.video_embed])
-            self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
-                                       for e in self.dataset.description_embed])
-        except Exception as e:
-            st.error(f"Error preparing features: {e}")
-            num_rows = len(self.dataset)
-            self.video_embeds = np.random.randn(num_rows, 384)
-            self.text_embeds = np.random.randn(num_rows, 384)
-    def search(self, query, top_k=5):
         query_embedding = self.text_model.encode([query])[0]
         video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
         text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
         combined_sims = 0.5 * video_sims + 0.5 * text_sims
         top_indices = np.argsort(combined_sims)[-top_k:][::-1]
         results = []
         for idx in top_indices:
-            results.append({
-                'video_id': self.dataset.iloc[idx]['video_id'],
-                'youtube_id': self.dataset.iloc[idx]['youtube_id'],
-                'description': self.dataset.iloc[idx]['description'],
-                'start_time': self.dataset.iloc[idx]['start_time'],
-                'end_time': self.dataset.iloc[idx]['end_time'],
-                'relevance_score': float(combined_sims[idx]),
-                'views': self.dataset.iloc[idx]['views']
-            })
         return results
-def perform_arxiv_search(query, vocal_summary=True, extended_refs=False):
-    """Perform Arxiv search with audio summaries"""
-    try:
-        client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
-        refs = client.predict(query, 20, "Semantic Search",
-                            "mistralai/Mixtral-8x7B-Instruct-v0.1",
-                            api_name="/update_with_rag_md")[0]
-        response = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1",
-                                True, api_name="/ask_llm")
-        result = f"### 🔎 {query}\n\n{response}\n\n{refs}"
-        st.markdown(result)
-        if vocal_summary:
-            audio_file = asyncio.run(generate_speech(response[:500]))
-            if audio_file:
-                st.audio(audio_file)
-                os.remove(audio_file)
-        return result
-    except Exception as e:
-        st.error(f"Error in Arxiv search: {e}")
-        return None
 async def generate_speech(text, voice="en-US-AriaNeural"):
     """Generate speech using Edge TTS"""
     if not text.strip():
         return None
     try:
         communicate = edge_tts.Communicate(text, voice)
         audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
@@ -288,79 +285,130 @@ async def generate_speech(text, voice="en-US-AriaNeural"):
         st.error(f"Error generating speech: {e}")
         return None
-def process_audio_input(audio_bytes):
-    """Process audio input from recorder"""
-    if audio_bytes:
-        # Save temporary file
-        audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
-        with open(audio_path, "wb") as f:
-            f.write(audio_bytes)
-        # Here you would typically use a speech-to-text service
-        # For now, we'll just acknowledge the recording
-        st.success("Audio recorded successfully!")
-        # Cleanup
-        if os.path.exists(audio_path):
-            os.remove(audio_path)
-        return True
-    return False
 def main():
-    st.title("🎥 Video Search & Research Assistant")
-    # Initialize search
     search = VideoSearch()
-    # Create main tabs
-    tab1, tab2, tab3 = st.tabs(["🔍 Video Search", "🎙️ Voice & Audio", "📚 Arxiv Research"])
     with tab1:
-        st.subheader("Search Video Dataset")
-        # Text search
-        query = st.text_input("Enter your search query:")
-        col1, col2 = st.columns(2)
         with col1:
-            search_button = st.button("🔍 Search")
         with col2:
-            num_results = st.slider("Number of results:", 1, 10, 5)
-        if search_button and query:
-            results = search.search(query, num_results)
             st.session_state['search_history'].append({
                 'query': query,
                 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                'results': results
             })
             for i, result in enumerate(results, 1):
-                with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
                     cols = st.columns([2, 1])
                     with cols[0]:
-                        st.markdown(f"**Full Description:**")
                         st.write(result['description'])
                         st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
                         st.markdown(f"**Views:** {result['views']:,}")
                     with cols[1]:
                         st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
-                        if result['youtube_id']:
                             st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
-                        # Generate audio summary
-                        if st.button(f"🔊 Generate Audio Summary", key=f"audio_{i}"):
                             summary = f"Video summary: {result['description'][:200]}"
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
-                                os.remove(audio_file)
     with tab2:
-        st.subheader("Voice Input & Audio Recording")
         col1, col2 = st.columns(2)
         with col1:
@@ -372,49 +420,71 @@ def main():
                 st.markdown("**Transcribed Text:**")
                 st.write(voice_input)
-                if st.button("🔍 Search Videos"):
-                    results = search.search(voice_input, num_results)
                     for i, result in enumerate(results, 1):
                         with st.expander(f"Result {i}", expanded=i==1):
                             st.write(result['description'])
-                            if result['youtube_id']:
-                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
         with col2:
-            st.write("🎵 Audio Recorder")
             audio_bytes = audio_recorder()
             if audio_bytes:
-                process_audio_input(audio_bytes)
     with tab3:
-        st.subheader("Arxiv Research")
-        arxiv_query = st.text_input("🔍 Research Query:")
-        col1, col2 = st.columns(2)
-        with col1:
-            vocal_summary = st.checkbox("Generate Audio Summary", value=True)
-        with col2:
-            extended_refs = st.checkbox("Include Extended References", value=False)
-        if st.button("🔍 Search Arxiv") and arxiv_query:
-            perform_arxiv_search(arxiv_query, vocal_summary, extended_refs)
-    # Sidebar for history and settings
     with st.sidebar:
         st.subheader("⚙️ Settings & History")
         if st.button("🗑️ Clear History"):
             st.session_state['search_history'] = []
-            st.experimental_rerun()
         st.markdown("### Recent Searches")
         for entry in reversed(st.session_state['search_history'][-5:]):
-            st.markdown(f"**{entry['timestamp']}**: {entry['query']}")
         st.markdown("### Voice Settings")
         st.selectbox("TTS Voice:",
                     ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
                     key="tts_voice")
 if __name__ == "__main__":
     main()

 import asyncio
 import base64
 import requests
 from collections import defaultdict
 from audio_recorder_streamlit import audio_recorder
 import streamlit.components.v1 as components
 # Initialize session state
 if 'search_history' not in st.session_state:
     st.session_state['search_history'] = []
     st.session_state['transcript_history'] = []
 if 'should_rerun' not in st.session_state:
     st.session_state['should_rerun'] = False
+if 'search_columns' not in st.session_state:
+    st.session_state['search_columns'] = []
+if 'initial_search_done' not in st.session_state:
+    st.session_state['initial_search_done'] = False
 class VideoSearch:
     def __init__(self):
         self.load_dataset()
     def fetch_dataset_rows(self):
+        """Fetch dataset from Hugging Face API"""
         try:
             url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"
             response = requests.get(url, timeout=30)
             if response.status_code == 200:
                 data = response.json()
                 if 'rows' in data:
                     processed_rows = []
                     for row_data in data['rows']:
+                        row = row_data.get('row', row_data)
+                        for key in row:
+                            if any(term in key.lower() for term in ['embed', 'vector', 'encoding']):
+                                if isinstance(row[key], str):
+                                    try:
+                                        row[key] = [float(x.strip()) for x in row[key].strip('[]').split(',') if x.strip()]
+                                    except:
+                                        continue
+                        processed_rows.append(row)
                     df = pd.DataFrame(processed_rows)
+                    # Update search columns
+                    st.session_state['search_columns'] = [col for col in df.columns
+                                                        if col not in ['video_embed', 'description_embed', 'audio_embed']]
                     return df
+            return self.load_example_data()
         except Exception as e:
             return self.load_example_data()
+    def prepare_features(self):
+        """Prepare embeddings with adaptive field detection"""
+        try:
+            embed_cols = [col for col in self.dataset.columns
+                         if any(term in col.lower() for term in ['embed', 'vector', 'encoding'])]
+            embeddings = {}
+            for col in embed_cols:
+                try:
+                    data = []
+                    for row in self.dataset[col]:
+                        if isinstance(row, str):
+                            values = [float(x.strip()) for x in row.strip('[]').split(',') if x.strip()]
+                        elif isinstance(row, list):
+                            values = row
+                        else:
+                            continue
+                        data.append(values)
+                    if data:
+                        embeddings[col] = np.array(data)
+                except Exception as e:
+                    continue
+            # Set main embeddings for search
+            if 'video_embed' in embeddings:
+                self.video_embeds = embeddings['video_embed']
+            else:
+                self.video_embeds = next(iter(embeddings.values()))
+            if 'description_embed' in embeddings:
+                self.text_embeds = embeddings['description_embed']
+            else:
+                self.text_embeds = self.video_embeds
+        except Exception as e:
+            # Fallback to random embeddings
+            num_rows = len(self.dataset)
+            self.video_embeds = np.random.randn(num_rows, 384)
+            self.text_embeds = np.random.randn(num_rows, 384)
     def load_example_data(self):
         """Load example data as fallback"""
         example_data = [
             {
                 "video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
                 "youtube_id": "IO-vwtyicn4",
+                "description": "This video shows a close-up of an ancient text carved into a surface.",
                 "views": 45489,
                 "start_time": 1452,
                 "end_time": 1458,
                 "video_embed": [0.014160037972033024, -0.003111184574663639, -0.016604168340563774],
                 "description_embed": [-0.05835828185081482, 0.02589797042310238, 0.11952091753482819]
             }
         ]
         return pd.DataFrame(example_data)
     def load_dataset(self):
+        self.dataset = self.fetch_dataset_rows()
+        self.prepare_features()
+    def search(self, query, column=None, top_k=20):
+        """Search videos using query with column filtering"""
+        # Semantic search
         query_embedding = self.text_model.encode([query])[0]
         video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
         text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]
         combined_sims = 0.5 * video_sims + 0.5 * text_sims
+        # Column-specific text search if specified
+        if column and column in self.dataset.columns:
+            mask = self.dataset[column].astype(str).str.contains(query, case=False)
+            combined_sims[~mask] *= 0.5  # Reduce scores for non-matching rows
+        # Get top results
+        top_k = min(top_k, 100)
         top_indices = np.argsort(combined_sims)[-top_k:][::-1]
         results = []
         for idx in top_indices:
+            result = {
+                'relevance_score': float(combined_sims[idx])
+            }
+            for col in self.dataset.columns:
+                if col not in ['video_embed', 'description_embed', 'audio_embed']:
+                    result[col] = self.dataset.iloc[idx][col]
+            results.append(result)
         return results
+def main():
+    st.title("🎥 Video Search with Speech Recognition")
+    # Initialize search
+    search = VideoSearch()
+    # Create tabs
+    tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "📂 Files"])
+    with tab1:
+        st.subheader("Search Videos")
+        # Search interface
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            query = st.text_input("Enter your search query:", value="ancient" if not st.session_state['initial_search_done'] else "")
+        with col2:
+            search_column = st.selectbox("Search in field:",
+                                       ["All Fields"] + st.session_state['search_columns'])
+        col3, col4 = st.columns(2)
+        with col3:
+            num_results = st.slider("Number of results:", 1, 100, 20)
+        with col4:
+            search_button = st.button("🔍 Search")
+        # Process search
+        if (search_button or not st.session_state['initial_search_done']) and query:
+            st.session_state['initial_search_done'] = True
+            selected_column = None if search_column == "All Fields" else search_column
+            results = search.search(query, selected_column, num_results)
+            st.session_state['search_history'].append({
+                'query': query,
+                'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                'results': results[:5]  # Store only top 5 for history
+            })
+            for i, result in enumerate(results, 1):
+                with st.expander(f"Result {i}: {result['description'][:100]}...",
+                               expanded=i==1):
+                    cols = st.columns([2, 1])
+                    with cols[0]:
+                        st.markdown("**Description:**")
+                        st.write(result['description'])
+                        st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
+                        st.markdown(f"**Views:** {result['views']:,}")
+                    with cols[1]:
+                        st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
+                        if result.get('youtube_id'):
+                            st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
+                        if st.button(f"🔊 Audio Summary", key=f"audio_{i}"):
+                            summary = f"Video summary: {result['description'][:200]}"
+                            audio_file = asyncio.run(generate_speech(summary))
+                            if audio_file:
+                                st.audio(audio_file)
+                                if os.path.exists(audio_file):
+                                    os.remove(audio_file)
+    with tab2:
+        st.subheader("Voice Input")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.write("🎙️ Speech Recognition")
+            voice_input = speech_component()
+            if voice_input and voice_input != st.session_state['last_voice_input']:
+                st.session_state['last_voice_input'] = voice_input
+                st.markdown("**Transcribed Text:**")
+                st.write(voice_input)
+                if st.button("🔍 Search"):
+                    results = search.search(voice_input, None, num_results)
+                    for i, result in enumerate(results, 1):
+                        with st.expander(f"Result {i}", expanded=i==1):
+                            st.write(result['description'])
+                            if result.get('youtube_id'):
+                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
+        with col2:
+            st.write("🎵 Audio Recording")
+            audio_bytes = audio_recorder()
+            if audio_bytes:
+                audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+                with open(audio_path, "wb") as f:
+                    f.write(audio_bytes)
+                st.success("Audio recorded successfully!")
+                if os.path.exists(audio_path):
+                    os.remove(audio_path)
+    with tab3:
+        show_file_manager()
+    # Sidebar
+    with st.sidebar:
+        st.subheader("⚙️ Settings & History")
+        if st.button("🗑️ Clear History"):
+            st.session_state['search_history'] = []
+            st.experimental_rerun()
+        st.markdown("### Recent Searches")
+        for entry in reversed(st.session_state['search_history'][-5:]):
+            with st.expander(f"{entry['timestamp']}: {entry['query']}"):
+                for i, result in enumerate(entry['results'], 1):
+                    st.write(f"{i}. {result['description'][:100]}...")
+        st.markdown("### Voice Settings")
+        st.selectbox("TTS Voice:",
+                    ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
+                    key="tts_voice")
 async def generate_speech(text, voice="en-US-AriaNeural"):
     """Generate speech using Edge TTS"""
     if not text.strip():
         return None
     try:
         communicate = edge_tts.Communicate(text, voice)
         audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
         st.error(f"Error generating speech: {e}")
         return None
+def show_file_manager():
+    """Display file manager interface"""
+    st.subheader("📂 File Manager")
+    # File operations
+    col1, col2 = st.columns(2)
+    with col1:
+        uploaded_file = st.file_uploader("Upload File", type=['txt', 'md', 'mp3'])
+        if uploaded_file:
+            with open(uploaded_file.name, "wb") as f:
+                f.write(uploaded_file.getvalue())
+            st.success(f"Uploaded: {uploaded_file.name}")
+            st.rerun()
+    with col2:
+        if st.button("🗑 Clear All Files"):
+            for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
+                os.remove(f)
+            st.success("All files cleared!")
+            st.rerun()
+    # Show existing files
+    files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
+    if files:
+        st.write("### Existing Files")
+        for f in files:
+            with st.expander(f"📄 {os.path.basename(f)}"):
+                if f.endswith('.mp3'):
+                    st.audio(f)
+                else:
+                    with open(f, 'r') as file:
+                        st.text_area("Content", file.read(), height=100)
+                if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
+                    os.remove(f)
+                    st.rerun()
+@st.cache_data(ttl=3600)
+def load_file_list():
+    """Cache file listing"""
+    return glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
+@st.cache_resource
+def get_speech_model():
+    """Cache speech model initialization"""
+    return edge_tts.Communicate
+async def generate_speech(text, voice="en-US-AriaNeural"):
+    """Generate speech using Edge TTS with cached model"""
+    if not text.strip():
+        return None
+    try:
+        communicate = get_speech_model()(text, voice)
+        audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
+        await communicate.save(audio_file)
+        return audio_file
+    except Exception as e:
+        st.error(f"Error generating speech: {e}")
+        return None
 def main():
+    st.title("🎥 Video Search with Speech Recognition")
+    # Initialize search with cached model
     search = VideoSearch()
+    # Create tabs
+    tab1, tab2, tab3 = st.tabs(["🔍 Search", "🎙️ Voice Input", "📂 Files"])
     with tab1:
+        st.subheader("Search Videos")
+        # Search interface
+        col1, col2 = st.columns([3, 1])
         with col1:
+            query = st.text_input("Enter your search query:",
+                                value="ancient" if not st.session_state['initial_search_done'] else "")
         with col2:
+            search_column = st.selectbox("Search in field:",
+                                       ["All Fields"] + st.session_state['search_columns'])
+        col3, col4 = st.columns(2)
+        with col3:
+            num_results = st.slider("Number of results:", 1, 100, 20)
+        with col4:
+            search_button = st.button("🔍 Search")
+        # Process search
+        if (search_button or not st.session_state['initial_search_done']) and query:
+            st.session_state['initial_search_done'] = True
+            selected_column = None if search_column == "All Fields" else search_column
+            with st.spinner("Searching..."):
+                results = search.search(query, selected_column, num_results)
             st.session_state['search_history'].append({
                 'query': query,
                 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                'results': results[:5]  # Store only top 5 for history
             })
             for i, result in enumerate(results, 1):
+                with st.expander(f"Result {i}: {result['description'][:100]}...",
+                               expanded=i==1):
                     cols = st.columns([2, 1])
                     with cols[0]:
+                        st.markdown("**Description:**")
                         st.write(result['description'])
                         st.markdown(f"**Time Range:** {result['start_time']}s - {result['end_time']}s")
                         st.markdown(f"**Views:** {result['views']:,}")
                     with cols[1]:
                         st.markdown(f"**Relevance Score:** {result['relevance_score']:.2%}")
+                        if result.get('youtube_id'):
                             st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")
+                        if st.button(f"🔊 Audio Summary", key=f"audio_{i}"):
                             summary = f"Video summary: {result['description'][:200]}"
                             audio_file = asyncio.run(generate_speech(summary))
                             if audio_file:
                                 st.audio(audio_file)
+                                if os.path.exists(audio_file):
+                                    os.remove(audio_file)
     with tab2:
+        st.subheader("Voice Input")
         col1, col2 = st.columns(2)
         with col1:
                 st.markdown("**Transcribed Text:**")
                 st.write(voice_input)
+                if st.button("🔍 Search"):
+                    with st.spinner("Searching..."):
+                        results = search.search(voice_input, None, num_results)
                     for i, result in enumerate(results, 1):
                         with st.expander(f"Result {i}", expanded=i==1):
                             st.write(result['description'])
+                            if result.get('youtube_id'):
+                                st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
         with col2:
+            st.write("🎵 Audio Recording")
             audio_bytes = audio_recorder()
             if audio_bytes:
+                audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+                with open(audio_path, "wb") as f:
+                    f.write(audio_bytes)
+                st.success("Audio recorded successfully!")
+                if os.path.exists(audio_path):
+                    os.remove(audio_path)
     with tab3:
+        show_file_manager()
+    # Sidebar
     with st.sidebar:
         st.subheader("⚙️ Settings & History")
         if st.button("🗑️ Clear History"):
             st.session_state['search_history'] = []
+            st.rerun()
         st.markdown("### Recent Searches")
         for entry in reversed(st.session_state['search_history'][-5:]):
+            with st.expander(f"{entry['timestamp']}: {entry['query']}"):
+                for i, result in enumerate(entry['results'], 1):
+                    st.write(f"{i}. {result['description'][:100]}...")
         st.markdown("### Voice Settings")
         st.selectbox("TTS Voice:",
                     ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
                     key="tts_voice")
+            with open(uploaded_file.name, "wb") as f:
+                f.write(uploaded_file.getvalue())
+            st.success(f"Uploaded: {uploaded_file.name}")
+    with col2:
+        if st.button("🗑 Clear All Files"):
+            for f in glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3"):
+                os.remove(f)
+            st.success("All files cleared!")
+    # Show existing files
+    files = glob.glob("*.txt") + glob.glob("*.md") + glob.glob("*.mp3")
+    if files:
+        st.write("### Existing Files")
+        for f in files:
+            with st.expander(f"📄 {os.path.basename(f)}"):
+                if f.endswith('.mp3'):
+                    st.audio(f)
+                else:
+                    with open(f, 'r') as file:
+                        st.text_area("Content", file.read(), height=100)
+                if st.button(f"Delete {os.path.basename(f)}", key=f"del_{f}"):
+                    os.remove(f)
+                    st.experimental_rerun()
 if __name__ == "__main__":
     main()