awacke1 commited on
Commit
3874223
Β·
verified Β·
1 Parent(s): 8bcaf1e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +347 -0
app.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
+ import json
8
+ import os
9
+ import glob
10
+ from pathlib import Path
11
+ from datetime import datetime, timedelta
12
+ import edge_tts
13
+ import asyncio
14
+ import requests
15
+ from collections import defaultdict
16
+ from audio_recorder_streamlit import audio_recorder
17
+ import streamlit.components.v1 as components
18
+ from urllib.parse import quote
19
+ from xml.etree import ElementTree as ET
20
+ from datasets import load_dataset
21
+
22
+ # 🧠 Initialize session state variables
23
+ SESSION_VARS = {
24
+ 'search_history': [], # Track search history
25
+ 'last_voice_input': "", # Last voice input
26
+ 'transcript_history': [], # Conversation history
27
+ 'should_rerun': False, # Trigger for UI updates
28
+ 'search_columns': [], # Available search columns
29
+ 'initial_search_done': False, # First search flag
30
+ 'tts_voice': "en-US-AriaNeural", # Default voice
31
+ 'arxiv_last_query': "", # Last ArXiv search
32
+ 'dataset_loaded': False, # Dataset load status
33
+ 'current_page': 0, # Current data page
34
+ 'data_cache': None, # Data cache
35
+ 'dataset_info': None, # Dataset metadata
36
+ 'nps_submitted': False, # Track if user submitted NPS
37
+ 'nps_last_shown': None, # When NPS was last shown
38
+ 'voice_recorder_key': str(datetime.now()) # Unique key for voice recorder
39
+ }
40
+
41
+ # πŸ“Š Constants
42
+ ROWS_PER_PAGE = 100
43
+ MIN_SEARCH_SCORE = 0.3
44
+ EXACT_MATCH_BOOST = 2.0
45
+
46
+ # Initialize session state
47
+ for var, default in SESSION_VARS.items():
48
+ if var not in st.session_state:
49
+ st.session_state[var] = default
50
+
51
+ class NPSTracker:
52
+ """🎯 Net Promoter Score Tracker - Measuring happiness in numbers!"""
53
+
54
+ def __init__(self, log_file="nps_logs.csv"):
55
+ self.log_file = Path(log_file)
56
+ self.initialize_log()
57
+
58
+ def initialize_log(self):
59
+ """πŸ“ Create log file if it doesn't exist"""
60
+ if not self.log_file.exists():
61
+ df = pd.DataFrame(columns=['timestamp', 'score', 'feedback'])
62
+ df.to_csv(self.log_file, index=False)
63
+
64
+ def log_response(self, score, feedback=""):
65
+ """πŸ–ŠοΈ Log new NPS response"""
66
+ new_entry = pd.DataFrame([{
67
+ 'timestamp': datetime.now().isoformat(),
68
+ 'score': score,
69
+ 'feedback': feedback
70
+ }])
71
+
72
+ if self.log_file.exists():
73
+ df = pd.read_csv(self.log_file)
74
+ df = pd.concat([df, new_entry], ignore_index=True)
75
+ else:
76
+ df = new_entry
77
+
78
+ df.to_csv(self.log_file, index=False)
79
+
80
+ def get_nps_stats(self, days=30):
81
+ """πŸ“Š Calculate NPS stats for recent period"""
82
+ if not self.log_file.exists():
83
+ return {
84
+ 'nps_score': 0,
85
+ 'promoters': 0,
86
+ 'passives': 0,
87
+ 'detractors': 0,
88
+ 'total_responses': 0,
89
+ 'recent_feedback': []
90
+ }
91
+
92
+ df = pd.read_csv(self.log_file)
93
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
94
+
95
+ cutoff = datetime.now() - timedelta(days=days)
96
+ recent_df = df[df['timestamp'] > cutoff]
97
+
98
+ if len(recent_df) == 0:
99
+ return {
100
+ 'nps_score': 0,
101
+ 'promoters': 0,
102
+ 'passives': 0,
103
+ 'detractors': 0,
104
+ 'total_responses': 0,
105
+ 'recent_feedback': []
106
+ }
107
+
108
+ total = len(recent_df)
109
+ promoters = len(recent_df[recent_df['score'] >= 9])
110
+ passives = len(recent_df[recent_df['score'].between(7, 8)])
111
+ detractors = len(recent_df[recent_df['score'] <= 6])
112
+
113
+ nps = ((promoters/total) - (detractors/total)) * 100
114
+
115
+ recent_feedback = recent_df[recent_df['feedback'].notna()].sort_values(
116
+ 'timestamp', ascending=False
117
+ )['feedback'].head(5).tolist()
118
+
119
+ return {
120
+ 'nps_score': round(nps, 1),
121
+ 'promoters': promoters,
122
+ 'passives': passives,
123
+ 'detractors': detractors,
124
+ 'total_responses': total,
125
+ 'recent_feedback': recent_feedback
126
+ }
127
+
128
+ def setup_voice_recorder():
129
+ """🎀 Create an in-browser voice recorder component"""
130
+ return components.html(
131
+ """
132
+ <div style="display: flex; flex-direction: column; align-items: center; gap: 10px;">
133
+ <button id="startButton"
134
+ style="padding: 10px 20px; background: #ff4b4b; color: white; border: none; border-radius: 5px; cursor: pointer">
135
+ Start Recording
136
+ </button>
137
+ <button id="stopButton"
138
+ style="padding: 10px 20px; background: #4b4bff; color: white; border: none; border-radius: 5px; cursor: pointer"
139
+ disabled>
140
+ Stop Recording
141
+ </button>
142
+ <audio id="audioPlayback" controls style="display: none;"></audio>
143
+ <div id="statusText" style="color: #666;">Ready to record...</div>
144
+ </div>
145
+ <script>
146
+ let mediaRecorder;
147
+ let audioChunks = [];
148
+
149
+ document.getElementById('startButton').onclick = async () => {
150
+ try {
151
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
152
+ mediaRecorder = new MediaRecorder(stream);
153
+
154
+ mediaRecorder.ondataavailable = (e) => {
155
+ audioChunks.push(e.data);
156
+ };
157
+
158
+ mediaRecorder.onstop = () => {
159
+ const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
160
+ const audioUrl = URL.createObjectURL(audioBlob);
161
+ document.getElementById('audioPlayback').src = audioUrl;
162
+ document.getElementById('audioPlayback').style.display = 'block';
163
+
164
+ // Send to Python
165
+ const reader = new FileReader();
166
+ reader.readAsDataURL(audioBlob);
167
+ reader.onloadend = () => {
168
+ window.parent.postMessage({
169
+ type: 'voiceData',
170
+ data: reader.result
171
+ }, '*');
172
+ };
173
+ };
174
+
175
+ mediaRecorder.start();
176
+ document.getElementById('startButton').disabled = true;
177
+ document.getElementById('stopButton').disabled = false;
178
+ document.getElementById('statusText').textContent = 'Recording...';
179
+
180
+ } catch (err) {
181
+ console.error('Error:', err);
182
+ document.getElementById('statusText').textContent = 'Error: ' + err.message;
183
+ }
184
+ };
185
+
186
+ document.getElementById('stopButton').onclick = () => {
187
+ mediaRecorder.stop();
188
+ document.getElementById('startButton').disabled = false;
189
+ document.getElementById('stopButton').disabled = true;
190
+ document.getElementById('statusText').textContent = 'Recording complete!';
191
+ };
192
+ </script>
193
+ """,
194
+ height=200,
195
+ )
196
+
197
+ def render_nps_sidebar():
198
+ """🎨 Show NPS metrics in sidebar"""
199
+ tracker = NPSTracker()
200
+ stats = tracker.get_nps_stats()
201
+
202
+ st.sidebar.markdown("### πŸ“Š User Satisfaction Metrics")
203
+
204
+ score_color = (
205
+ "🟒" if stats['nps_score'] >= 50 else
206
+ "🟑" if stats['nps_score'] >= 0 else
207
+ "πŸ”΄"
208
+ )
209
+ st.sidebar.metric(
210
+ "Net Promoter Score",
211
+ f"{score_color} {stats['nps_score']}"
212
+ )
213
+
214
+ st.sidebar.markdown("#### Response Breakdown")
215
+ col1, col2, col3 = st.sidebar.columns(3)
216
+ with col1:
217
+ st.metric("πŸ˜ƒ", stats['promoters'])
218
+ with col2:
219
+ st.metric("😐", stats['passives'])
220
+ with col3:
221
+ st.metric("πŸ˜•", stats['detractors'])
222
+
223
+ if stats['recent_feedback']:
224
+ st.sidebar.markdown("#### Recent Feedback")
225
+ for feedback in stats['recent_feedback']:
226
+ st.sidebar.info(feedback[:100] + "..." if len(feedback) > 100 else feedback)
227
+
228
+ def render_nps_survey():
229
+ """🎯 Show NPS survey form"""
230
+ tracker = NPSTracker()
231
+
232
+ st.markdown("### πŸ“ Your Feedback Matters!")
233
+ score = st.slider(
234
+ "How likely are you to recommend this search tool to others?",
235
+ 0, 10,
236
+ help="0 = Not likely at all, 10 = Extremely likely"
237
+ )
238
+
239
+ feedback = st.text_area("Additional feedback (optional)")
240
+
241
+ if st.button("Submit Feedback", key="nps_submit"):
242
+ tracker.log_response(score, feedback)
243
+ st.session_state['nps_submitted'] = True
244
+ st.success("Thank you for your feedback! πŸ™")
245
+ st.experimental_rerun()
246
+
247
+ [... Rest of your existing code for search functionality ...]
248
+
249
+ def main():
250
+ st.title("πŸŽ₯ Smart Video Search with Voice & Feedback")
251
+
252
+ # Initialize search
253
+ search = VideoSearch()
254
+
255
+ # Add NPS metrics to sidebar
256
+ with st.sidebar:
257
+ render_nps_sidebar()
258
+
259
+ # Show survey periodically
260
+ current_time = datetime.now()
261
+ if (not st.session_state.get('nps_submitted') and
262
+ (not st.session_state.get('nps_last_shown') or
263
+ current_time - st.session_state['nps_last_shown'] > timedelta(hours=24))):
264
+ with st.expander("πŸ“ Quick Feedback", expanded=True):
265
+ render_nps_survey()
266
+ st.session_state['nps_last_shown'] = current_time
267
+
268
+ # Create main tabs
269
+ tab1, tab2, tab3, tab4 = st.tabs([
270
+ "πŸ” Search", "πŸŽ™οΈ Voice Input", "πŸ“š ArXiv", "πŸ“‚ Files"
271
+ ])
272
+
273
+ # Search Tab
274
+ with tab1:
275
+ st.subheader("Search Videos")
276
+ col1, col2 = st.columns([3, 1])
277
+ with col1:
278
+ query = st.text_input("Enter search query:",
279
+ value="" if st.session_state['initial_search_done'] else "aliens")
280
+ with col2:
281
+ search_column = st.selectbox("Search in:",
282
+ ["All Fields"] + st.session_state['search_columns'])
283
+
284
+ col3, col4 = st.columns(2)
285
+ with col3:
286
+ num_results = st.slider("Max results:", 1, 100, 20)
287
+ with col4:
288
+ search_button = st.button("πŸ” Search")
289
+
290
+ if (search_button or not st.session_state['initial_search_done']) and query:
291
+ st.session_state['initial_search_done'] = True
292
+ selected_column = None if search_column == "All Fields" else search_column
293
+
294
+ with st.spinner("Searching..."):
295
+ results = search.search(query, selected_column, num_results)
296
+
297
+ if results:
298
+ st.session_state['search_history'].append({
299
+ 'query': query,
300
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
301
+ 'results': results[:5]
302
+ })
303
+
304
+ st.write(f"Found {len(results)} results:")
305
+ for i, result in enumerate(results, 1):
306
+ with st.expander(f"Result {i}", expanded=(i==1)):
307
+ render_result(result)
308
+ else:
309
+ st.warning("No matching results found.")
310
+
311
+ # Voice Input Tab
312
+ with tab2:
313
+ st.subheader("Voice Search")
314
+ st.write("πŸŽ™οΈ Record your query:")
315
+
316
+ voice_recorder = setup_voice_recorder()
317
+
318
+ if 'voice_data' in st.session_state:
319
+ with st.spinner("Processing voice..."):
320
+ voice_query = transcribe_audio(st.session_state['voice_data'])
321
+ st.markdown("**Transcribed Text:**")
322
+ st.write(voice_query)
323
+
324
+ if st.button("πŸ” Search with Voice"):
325
+ results = search.search(voice_query, None, 20)
326
+ for i, result in enumerate(results, 1):
327
+ with st.expander(f"Result {i}", expanded=(i==1)):
328
+ render_result(result)
329
+
330
+ # ArXiv Tab
331
+ with tab3:
332
+ st.subheader("ArXiv Search")
333
+ arxiv_query = st.text_input("Search ArXiv:", value=st.session_state['arxiv_last_query'])
334
+ vocal_summary = st.checkbox("πŸŽ™ Quick Audio Summary", value=True)
335
+ titles_summary = st.checkbox("πŸ”– Titles Only", value=True)
336
+ full_audio = st.checkbox("πŸ“š Full Audio Summary", value=False)
337
+
338
+ if st.button("πŸ” Search ArXiv"):
339
+ st.session_state['arxiv_last_query'] = arxiv_query
340
+ perform_arxiv_lookup(arxiv_query, vocal_summary, titles_summary, full_audio)
341
+
342
+ # File Manager Tab
343
+ with tab4:
344
+ show_file_manager()
345
+
346
+ if __name__ == "__main__":
347
+ main()