Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Sleeping

App Files Files Community

awacke1 commited on Dec 20, 2024

Commit

e9907ed

verified ·

1 Parent(s): 16ef1bd

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -318

app.py CHANGED Viewed

@@ -22,20 +22,24 @@ import base64
 import re
 # -------------------- Configuration & Constants --------------------
-# User name assignment
 USER_NAMES = [
     "Alex", "Jordan", "Taylor", "Morgan", "Rowan", "Avery", "Riley", "Quinn",
     "Casey", "Jesse", "Reese", "Skyler", "Ellis", "Devon", "Aubrey", "Kendall",
     "Parker", "Dakota", "Sage", "Finley"
 ]
 ROWS_PER_PAGE = 100
 MIN_SEARCH_SCORE = 0.3
 EXACT_MATCH_BOOST = 2.0
 SAVED_INPUTS_DIR = "saved_inputs"
 os.makedirs(SAVED_INPUTS_DIR, exist_ok=True)
-# -------------------- Session State Initialization --------------------
 SESSION_VARS = {
     'search_history': [],
     'last_voice_input': "",
@@ -53,21 +57,20 @@ SESSION_VARS = {
     'nps_last_shown': None,
     'old_val': None,
     'voice_text': None,
-    'user_name': None,   # Track user name
-    'max_items': 100      # Default max items
 }
 for var, default in SESSION_VARS.items():
     if var not in st.session_state:
         st.session_state[var] = default
-# Assign user name if not assigned
-if st.session_state['user_name'] is None:
-    st.session_state['user_name'] = random.choice(USER_NAMES)
-# -------------------- Utility Functions --------------------
 def create_voice_component():
-    """Create the voice input component"""
     mycomponent = components.declare_component(
         "mycomponent",
         path="mycomponent"
@@ -83,7 +86,6 @@ def clean_for_speech(text: str) -> str:
     return text
 async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
-    """Generate audio using Edge TTS"""
     text = clean_for_speech(text)
     if not text.strip():
         return None
@@ -94,68 +96,39 @@ async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=
     await communicate.save(out_fn)
     return out_fn
-def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0):
-    return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch))
 def play_and_download_audio(file_path):
-    """Play and provide download link for audio"""
     if file_path and os.path.exists(file_path):
         st.audio(file_path)
         dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
         st.markdown(dl_link, unsafe_allow_html=True)
-@st.cache_resource
-def get_model():
-    return SentenceTransformer('all-MiniLM-L6-v2')
-@st.cache_data
-def load_dataset_page(dataset_id, token, page, rows_per_page):
-    try:
-        start_idx = page * rows_per_page
-        end_idx = start_idx + rows_per_page
-        dataset = load_dataset(
-            dataset_id,
-            token=token,
-            streaming=False,
-            split=f'train[{start_idx}:{end_idx}]'
-        )
-        return pd.DataFrame(dataset)
-    except Exception as e:
-        st.error(f"Error loading page {page}: {str(e)}")
-        return pd.DataFrame()
-@st.cache_data
-def get_dataset_info(dataset_id, token):
-    try:
-        dataset = load_dataset(dataset_id, token=token, streaming=True)
-        return dataset['train'].info
-    except Exception as e:
-        st.error(f"Error loading dataset info: {str(e)}")
-        return None
-def fetch_dataset_info(dataset_id):
-    info_url = f"https://huggingface.co/api/datasets/{dataset_id}"
-    try:
-        response = requests.get(info_url, timeout=30)
-        if response.status_code == 200:
-            return response.json()
-    except Exception as e:
-        st.warning(f"Error fetching dataset info: {e}")
-    return None
-def generate_filename(text):
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     safe_text = re.sub(r'[^\w\s-]', '', text[:50]).strip().lower()
     safe_text = re.sub(r'[-\s]+', '-', safe_text)
-    return f"{timestamp}_{safe_text}.md"
-def save_input_as_md(text):
     if not text.strip():
         return
-    fn = generate_filename(text)
     full_path = os.path.join(SAVED_INPUTS_DIR, fn)
     with open(full_path, 'w', encoding='utf-8') as f:
-        f.write(f"# User: {st.session_state['user_name']}\n")
         f.write(f"**Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
         f.write(text)
     return full_path
@@ -164,60 +137,61 @@ def list_saved_inputs():
     files = sorted(glob.glob(os.path.join(SAVED_INPUTS_DIR, "*.md")))
     return files
-def render_result(result, index=None):
-    score = result.get('relevance_score', 0)
-    result_filtered = {k: v for k, v in result.items()
-                      if k not in ['relevance_score', 'video_embed', 'description_embed', 'audio_embed']}
-    if 'youtube_id' in result:
-        st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result.get('start_time', 0)}")
-    cols = st.columns([2, 1])
-    with cols[0]:
-        text_content = []
-        for key, value in result_filtered.items():
-            if isinstance(value, (str, int, float)):
-                st.write(f"**{key}:** {value}")
-                if isinstance(value, str) and len(value.strip()) > 0:
-                    text_content.append(f"{key}: {value}")
-    with cols[1]:
-        st.metric("Relevance", f"{score:.2%}")
-        voices = {
-            "Aria (US Female)": "en-US-AriaNeural",
-            "Guy (US Male)": "en-US-GuyNeural",
-            "Sonia (UK Female)": "en-GB-SoniaNeural",
-            "Tony (UK Male)": "en-GB-TonyNeural"
-        }
-        # Ensure unique keys by using the index
-        voice_key = f"voice_{index}" if index is not None else f"voice_{id(result)}"
-        selected_voice = st.selectbox(
-            "Voice:",
-            list(voices.keys()),
-            key=voice_key
         )
-        read_key = f"read_{voice_key}"
-        if st.button("🔊 Read", key=read_key):
-            text_to_read = ". ".join(text_content)
-            audio_file = speak_with_edge_tts(text_to_read, voices[selected_voice])
-            if audio_file:
-                play_and_download_audio(audio_file)
 class FastDatasetSearcher:
     def __init__(self, dataset_id="tomg-group-umd/cinepile"):
         self.dataset_id = dataset_id
         self.text_model = get_model()
         self.token = os.environ.get('DATASET_KEY')
-        if not self.token:
-            st.error("Please set the DATASET_KEY environment variable")
-            st.stop()
-        if st.session_state['dataset_info'] is None:
-            st.session_state['dataset_info'] = get_dataset_info(self.dataset_id, self.token)
     def load_page(self, page=0):
         return load_dataset_page(self.dataset_id, self.token, page, ROWS_PER_PAGE)
@@ -245,7 +219,6 @@ class FastDatasetSearcher:
                 text_parts = []
                 row_matched = False
                 exact_match = False
                 priority_fields = ['description', 'matched_text']
                 other_fields = [col for col in searchable_cols if col not in priority_fields]
@@ -271,7 +244,6 @@ class FastDatasetSearcher:
                         text_parts.append(str(val))
                 text = ' '.join(text_parts)
                 if text.strip():
                     text_tokens = set(text.lower().split())
                     matching_terms = query_terms.intersection(text_tokens)
@@ -303,241 +275,160 @@ class FastDatasetSearcher:
             ]
             return filtered_df.sort_values('score', ascending=False)
-        except Exception as e:
-            st.error(f"Search error: {str(e)}")
             return df
-# -------------------- Main App --------------------
 def main():
-    st.title("🎥 Smart Video & Voice Search")
-    # Load saved inputs (conversation history)
-    saved_files = list_saved_inputs()
-    # Initialize components
-    voice_component = create_voice_component()
-    search = FastDatasetSearcher()
-    # Voice input at top level
-    voice_val = voice_component(my_input_value="Start speaking...")
-    # User can override max items
     with st.sidebar:
-        st.write(f"**Current User:** {st.session_state['user_name']}")
-        st.session_state['max_items'] = st.number_input("Max Items per search iteration:", min_value=1, max_value=1000, value=st.session_state['max_items'])
-        st.subheader("📝 Saved Inputs:")
-        # Show saved md files in order
-        for fpath in saved_files:
-            fname = os.path.basename(fpath)
-            st.write(f"- [{fname}]({fpath})")
-    if voice_val:
-        voice_text = str(voice_val).strip()
-        edited_input = st.text_area("✏️ Edit Voice Input:", value=voice_text, height=100)
-        # Auto-run default True now
-        run_option = st.selectbox("Select Search Type:",
-                                  ["Quick Search", "Deep Search", "Voice Summary"])
-        col1, col2 = st.columns(2)
-        with col1:
-            autorun = st.checkbox("⚡ Auto-Run", value=True)
-        with col2:
-            full_audio = st.checkbox("🔊 Full Audio", value=False)
-        input_changed = (voice_text != st.session_state.get('old_val'))
-        if autorun and input_changed:
-            # Save input as md file immediately
-            saved_path = save_input_as_md(edited_input)
-            st.session_state['old_val'] = voice_text
-            with st.spinner("Processing voice input..."):
-                # Instead of just top 20, show up to max_items in order
-                if run_option == "Quick Search":
-                    df = search.load_page()
-                    results = search.quick_search(edited_input, df)
-                    # Show results in order, stopping at max_items
-                    shown = 0
-                    for i, result in enumerate(results.iterrows(), 1):
-                        if shown >= st.session_state['max_items']:
-                            break
-                        with st.expander(f"Result {i}", expanded=(i==1)):
-                            render_result(result[1], index=i)
-                        shown += 1
-                elif run_option == "Deep Search":
-                    # For deep search, iterate through pages until we hit max_items
-                    results_all = []
-                    page = 0
-                    while len(results_all) < st.session_state['max_items']:
-                        df = search.load_page(page)
-                        if df.empty:
-                            break
-                        these_results = search.quick_search(edited_input, df)
-                        if these_results.empty:
-                            break
-                        results_all.extend(these_results.iterrows())
-                        page += 1
-                    shown = 0
-                    for i, result in enumerate(results_all, 1):
-                        if shown >= st.session_state['max_items']:
-                            break
-                        with st.expander(f"Result {i}", expanded=(i==1)):
-                            render_result(result[1], index=i)
-                        shown += 1
-                elif run_option == "Voice Summary":
-                    audio_file = speak_with_edge_tts(edited_input)
-                    if audio_file:
-                        play_and_download_audio(audio_file)
-        elif st.button("🔍 Search", key="voice_input_search"):
-            # Manual search trigger
-            # Save input as md file
-            saved_path = save_input_as_md(edited_input)
-            st.session_state['old_val'] = voice_text
-            with st.spinner("Processing..."):
-                df = search.load_page()
-                results = search.quick_search(edited_input, df)
-                shown = 0
-                for i, result in enumerate(results.iterrows(), 1):
-                    if shown >= st.session_state['max_items']:
-                        break
-                    with st.expander(f"Result {i}", expanded=(i==1)):
-                        render_result(result[1], index=i)
-                    shown += 1
-    # Tabs
-    tab1, tab2, tab3, tab4 = st.tabs([
-        "🔍 Search", "🎙️ Voice", "💾 History", "⚙️ Settings"
-    ])
     with tab1:
-        st.subheader("🔍 Search")
-        col1, col2 = st.columns([3, 1])
-        with col1:
-            query = st.text_input("Enter search query:",
-                                  value="" if st.session_state['initial_search_done'] == False else "")
-        with col2:
-            # Not strictly filtering by column now; user requested just show in order
-            search_column = st.selectbox("Search in:", ["All Fields"] + st.session_state['search_columns'])
-        col3, col4 = st.columns(2)
-        with col3:
-            num_results = st.slider("Max results:", 1, 100, 20)
-        with col4:
-            search_button = st.button("🔍 Search", key="main_search_button")
-        if (search_button or not st.session_state['initial_search_done']) and query:
-            st.session_state['initial_search_done'] = True
-            selected_column = None if search_column == "All Fields" else search_column
-            with st.spinner("Searching..."):
                 df = search.load_page()
                 results = search.quick_search(query, df)
                 if len(results) > 0:
-                    st.session_state['search_history'].append({
-                        'query': query,
-                        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                        'results': results[:5]
-                    })
                     st.write(f"Found {len(results)} results:")
                     shown = 0
                     for i, (_, result) in enumerate(results.iterrows(), 1):
                         if shown >= num_results:
                             break
                         with st.expander(f"Result {i}", expanded=(i==1)):
-                            render_result(result, index=i)
                         shown += 1
                 else:
                     st.warning("No matching results found.")
-    with tab2:
-        st.subheader("🎙️ Voice Input")
-        st.write("Use the voice input above to start speaking, or record a new message:")
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("🎙️ Start New Recording", key="start_recording_button"):
-                st.session_state['recording'] = True
-                st.experimental_rerun()
-        with col2:
-            if st.button("🛑 Stop Recording", key="stop_recording_button"):
-                st.session_state['recording'] = False
-                st.experimental_rerun()
-        if st.session_state.get('recording', False):
-            voice_component = create_voice_component()
-            new_val = voice_component(my_input_value="Recording...")
-            if new_val:
-                st.text_area("Recorded Text:", value=new_val, height=100)
-                if st.button("🔍 Search with Recording", key="recording_search_button"):
-                    # Save this input right away
-                    saved_path = save_input_as_md(new_val)
-                    with st.spinner("Processing recording..."):
-                        df = search.load_page()
-                        results = search.quick_search(new_val, df)
-                        shown = 0
-                        for i, (_, result) in enumerate(results.iterrows(), 1):
-                            if shown >= st.session_state['max_items']:
-                                break
-                            with st.expander(f"Result {i}", expanded=(i==1)):
-                                render_result(result, index=i)
-                            shown += 1
-    with tab3:
-        st.subheader("💾 Search History")
-        if not st.session_state['search_history']:
-            st.info("No search history yet. Try searching for something!")
-        else:
-            for entry in reversed(st.session_state['search_history']):
-                with st.expander(f"🕒 {entry['timestamp']} - {entry['query']}", expanded=False):
-                    for i, result in enumerate(entry['results'], 1):
-                        st.write(f"**Result {i}:**")
-                        if isinstance(result, pd.Series):
-                            render_result(result, index=i)
-                        else:
-                            st.write(result)
     with tab4:
-        st.subheader("⚙️ Settings")
-        st.write("Voice Settings:")
-        default_voice = st.selectbox(
-            "Default Voice:",
-            [
-                "en-US-AriaNeural",
-                "en-US-GuyNeural",
-                "en-GB-SoniaNeural",
-                "en-GB-TonyNeural"
-            ],
-            index=0,
-            key="default_voice_setting"
-        )
-        st.write("Search Settings:")
-        st.slider("Minimum Search Score:", 0.0, 1.0, MIN_SEARCH_SCORE, 0.1, key="min_search_score")
-        st.slider("Exact Match Boost:", 1.0, 3.0, EXACT_MATCH_BOOST, 0.1, key="exact_match_boost")
-        if st.button("🗑️ Clear Search History", key="clear_history_button"):
             st.session_state['search_history'] = []
             st.success("Search history cleared!")
-            st.experimental_rerun()
-    # Sidebar metrics
-    with st.sidebar:
-        st.subheader("📊 Search Metrics")
-        total_searches = len(st.session_state['search_history'])
-        st.metric("Total Searches", total_searches)
-        if total_searches > 0:
-            recent_searches = st.session_state['search_history'][-5:]
-            st.write("Recent Searches:")
-            for entry in reversed(recent_searches):
-                st.write(f"🔍 {entry['query']}")
 if __name__ == "__main__":
     main()

 import re
 # -------------------- Configuration & Constants --------------------
 USER_NAMES = [
     "Alex", "Jordan", "Taylor", "Morgan", "Rowan", "Avery", "Riley", "Quinn",
     "Casey", "Jesse", "Reese", "Skyler", "Ellis", "Devon", "Aubrey", "Kendall",
     "Parker", "Dakota", "Sage", "Finley"
 ]
+ENGLISH_VOICES = [
+    "en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural", "en-GB-TonyNeural",
+    "en-US-JennyNeural", "en-US-DavisNeural", "en-GB-LibbyNeural", "en-CA-ClaraNeural",
+    "en-CA-LiamNeural", "en-AU-NatashaNeural", "en-AU-WilliamNeural"
+]
 ROWS_PER_PAGE = 100
 MIN_SEARCH_SCORE = 0.3
 EXACT_MATCH_BOOST = 2.0
 SAVED_INPUTS_DIR = "saved_inputs"
 os.makedirs(SAVED_INPUTS_DIR, exist_ok=True)
 SESSION_VARS = {
     'search_history': [],
     'last_voice_input': "",
     'nps_last_shown': None,
     'old_val': None,
     'voice_text': None,
+    'user_name': random.choice(USER_NAMES),
+    'max_items': 100,
+    'global_voice': "en-US-AriaNeural"  # Default global voice
 }
 for var, default in SESSION_VARS.items():
     if var not in st.session_state:
         st.session_state[var] = default
+@st.cache_resource
+def get_model():
+    return SentenceTransformer('all-MiniLM-L6-v2')
 def create_voice_component():
     mycomponent = components.declare_component(
         "mycomponent",
         path="mycomponent"
     return text
 async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
     text = clean_for_speech(text)
     if not text.strip():
         return None
     await communicate.save(out_fn)
     return out_fn
+def speak_with_edge_tts(text, voice="en-US-AriaNeural"):
+    return asyncio.run(edge_tts_generate_audio(text, voice, 0, 0))
 def play_and_download_audio(file_path):
     if file_path and os.path.exists(file_path):
         st.audio(file_path)
         dl_link = f'<a href="data:audio/mpeg;base64,{base64.b64encode(open(file_path,"rb").read()).decode()}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>'
         st.markdown(dl_link, unsafe_allow_html=True)
+def generate_filename(prefix, text):
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     safe_text = re.sub(r'[^\w\s-]', '', text[:50]).strip().lower()
     safe_text = re.sub(r'[-\s]+', '-', safe_text)
+    return f"{prefix}_{timestamp}_{safe_text}.md"
+def save_input_as_md(user_name, text, prefix="input"):
     if not text.strip():
         return
+    fn = generate_filename(prefix, text)
     full_path = os.path.join(SAVED_INPUTS_DIR, fn)
     with open(full_path, 'w', encoding='utf-8') as f:
+        f.write(f"# User: {user_name}\n")
+        f.write(f"**Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
+        f.write(text)
+    return full_path
+def save_response_as_md(user_name, text, prefix="response"):
+    if not text.strip():
+        return
+    fn = generate_filename(prefix, text)
+    full_path = os.path.join(SAVED_INPUTS_DIR, fn)
+    with open(full_path, 'w', encoding='utf-8') as f:
+        f.write(f"# User: {user_name}\n")
         f.write(f"**Timestamp:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
         f.write(text)
     return full_path
     files = sorted(glob.glob(os.path.join(SAVED_INPUTS_DIR, "*.md")))
     return files
+def parse_md_file(fpath):
+    # Extract user and text from md
+    user_line = ""
+    ts_line = ""
+    content_lines = []
+    with open(fpath, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+    for line in lines:
+        if line.startswith("# User:"):
+            user_line = line.replace("# User:", "").strip()
+        elif line.startswith("**Timestamp:**"):
+            ts_line = line.replace("**Timestamp:**", "").strip()
+        else:
+            content_lines.append(line.strip())
+    content = "\n".join(content_lines).strip()
+    return user_line, ts_line, content
+def fetch_dataset_info(dataset_id, token):
+    info_url = f"https://huggingface.co/api/datasets/{dataset_id}"
+    try:
+        response = requests.get(info_url, timeout=30)
+        if response.status_code == 200:
+            return response.json()
+    except Exception:
+        pass
+    return None
+@st.cache_data
+def get_dataset_info(dataset_id, token):
+    try:
+        dataset = load_dataset(dataset_id, token=token, streaming=True)
+        return dataset['train'].info
+    except:
+        return None
+@st.cache_data
+def load_dataset_page(dataset_id, token, page, rows_per_page):
+    try:
+        start_idx = page * rows_per_page
+        end_idx = start_idx + rows_per_page
+        dataset = load_dataset(
+            dataset_id,
+            token=token,
+            streaming=False,
+            split=f'train[{start_idx}:{end_idx}]'
         )
+        return pd.DataFrame(dataset)
+    except:
+        return pd.DataFrame()
 class FastDatasetSearcher:
     def __init__(self, dataset_id="tomg-group-umd/cinepile"):
         self.dataset_id = dataset_id
         self.text_model = get_model()
         self.token = os.environ.get('DATASET_KEY')
     def load_page(self, page=0):
         return load_dataset_page(self.dataset_id, self.token, page, ROWS_PER_PAGE)
                 text_parts = []
                 row_matched = False
                 exact_match = False
                 priority_fields = ['description', 'matched_text']
                 other_fields = [col for col in searchable_cols if col not in priority_fields]
                         text_parts.append(str(val))
                 text = ' '.join(text_parts)
                 if text.strip():
                     text_tokens = set(text.lower().split())
                     matching_terms = query_terms.intersection(text_tokens)
             ]
             return filtered_df.sort_values('score', ascending=False)
+        except:
             return df
+def play_text(text):
+    voice = st.session_state.get('global_voice', "en-US-AriaNeural")
+    audio_file = speak_with_edge_tts(text, voice=voice)
+    if audio_file:
+        play_and_download_audio(audio_file)
+def arxiv_search(query, max_results=3):
+    # Simple arXiv search using RSS (for demonstration)
+    # In production, use official arXiv API or a library.
+    base_url = "http://export.arxiv.org/api/query"
+    params = {
+        'search_query': query.replace(' ', '+'),
+        'start': 0,
+        'max_results': max_results
+    }
+    response = requests.get(base_url, params=params, timeout=30)
+    if response.status_code == 200:
+        root = ET.fromstring(response.text)
+        ns = {"a": "http://www.w3.org/2005/Atom"}
+        entries = root.findall('a:entry', ns)
+        results = []
+        for entry in entries:
+            title = entry.find('a:title', ns).text.strip()
+            summary = entry.find('a:summary', ns).text.strip()
+            # Just truncating summary for demo
+            summary_short = summary[:300] + "..."
+            results.append((title, summary_short))
+        return results
+    return []
+def summarize_arxiv_results(results):
+    # Just combine titles and short summaries
+    lines = []
+    for i, (title, summary) in enumerate(results, 1):
+        lines.append(f"Result {i}: {title}\n{summary}\n")
+    return "\n\n".join(lines)
 def main():
+    st.title("🎙️ Voice Chat & Search")
+    # Sidebar
     with st.sidebar:
+        # Editable user name
+        st.session_state['user_name'] = st.text_input("Current User:", value=st.session_state['user_name'])
+        # Global voice selection
+        st.session_state['global_voice'] = st.selectbox("Select Global Voice:", ENGLISH_VOICES, index=0)
+        st.session_state['max_items'] = st.number_input("Max Items per search iteration:", min_value=1, max_value=1000, value=st.session_state['max_items'])
+        st.subheader("📝 Saved Inputs & Responses")
+        saved_files = list_saved_inputs()
+        for fpath in saved_files:
+            user, ts, content = parse_md_file(fpath)
+            fname = os.path.basename(fpath)
+            st.write(f"- {fname} (User: {user})")
+    # Create voice component for input
+    voice_component = create_voice_component()
+    voice_val = voice_component(my_input_value="Start speaking...")
+    # Tabs: Voice Chat History, Arxiv Search, Dataset Search, Settings
+    tab1, tab2, tab3, tab4 = st.tabs(["🗣️ Voice Chat History", "📚 ArXiv Search", "📊 Dataset Search", "⚙️ Settings"])
+    # ------------------ Voice Chat History -------------------------
     with tab1:
+        st.subheader("Voice Chat History")
+        # List saved inputs and responses and allow playing them
+        files = list_saved_inputs()
+        for fpath in reversed(files):
+            user, ts, content = parse_md_file(fpath)
+            with st.expander(f"{ts} - {user}", expanded=False):
+                st.write(content)
+                if st.button("🔊 Read Aloud", key=f"read_{fpath}"):
+                    play_text(content)
+    # ------------------ ArXiv Search -------------------------
+    with tab2:
+        st.subheader("ArXiv Search")
+        # If we have a voice_val and autorun with ArXiv chosen:
+        edited_input = st.text_area("Enter or Edit Search Query:", value=(voice_val.strip() if voice_val else ""), height=100)
+        autorun = st.checkbox("⚡ Auto-Run", value=True)
+        run_arxiv = st.button("🔍 ArXiv Search")
+        input_changed = (edited_input != st.session_state.get('old_val'))
+        if autorun and input_changed and edited_input.strip():
+            st.session_state['old_val'] = edited_input
+            # Save user input
+            save_input_as_md(st.session_state['user_name'], edited_input, prefix="input")
+            with st.spinner("Searching ArXiv..."):
+                results = arxiv_search(edited_input)
+                if results:
+                    summary = summarize_arxiv_results(results)
+                    # Save response
+                    save_response_as_md(st.session_state['user_name'], summary, prefix="response")
+                    st.write(summary)
+                    # Autoplay TTS
+                    play_text(summary)
+                else:
+                    st.warning("No results found on ArXiv.")
+        if run_arxiv and edited_input.strip():
+            # Manual trigger
+            save_input_as_md(st.session_state['user_name'], edited_input, prefix="input")
+            with st.spinner("Searching ArXiv..."):
+                results = arxiv_search(edited_input)
+                if results:
+                    summary = summarize_arxiv_results(results)
+                    save_response_as_md(st.session_state['user_name'], summary, prefix="response")
+                    st.write(summary)
+                    play_text(summary)
+                else:
+                    st.warning("No results found on ArXiv.")
+    # ------------------ Dataset Search -------------------------
+    with tab3:
+        st.subheader("Dataset Search")
+        search = FastDatasetSearcher()
+        query = st.text_input("Enter dataset search query:")
+        run_ds_search = st.button("Search Dataset")
+        num_results = st.slider("Max results:", 1, 100, 20)
+        if run_ds_search and query.strip():
+            with st.spinner("Searching dataset..."):
                 df = search.load_page()
                 results = search.quick_search(query, df)
                 if len(results) > 0:
                     st.write(f"Found {len(results)} results:")
                     shown = 0
                     for i, (_, result) in enumerate(results.iterrows(), 1):
                         if shown >= num_results:
                             break
                         with st.expander(f"Result {i}", expanded=(i==1)):
+                            # Just print result keys/values here
+                            for k, v in result.items():
+                                if k not in ['score', 'matched']:
+                                    st.write(f"**{k}:** {v}")
                         shown += 1
                 else:
                     st.warning("No matching results found.")
+    # ------------------ Settings Tab -------------------------
     with tab4:
+        st.subheader("Settings")
+        st.write("Adjust voice and search parameters in the sidebar.")
+        if st.button("🗑️ Clear Search History"):
             st.session_state['search_history'] = []
+            # Optionally delete files:
+            # for fpath in list_saved_inputs():
+            #     os.remove(fpath)
             st.success("Search history cleared!")
 if __name__ == "__main__":
     main()