Spaces:

awacke1
/

CodeCompetitionClaudeVsGPT

Running

App Files Files Community

CodeCompetitionClaudeVsGPT / backup3.searchvideoworks.app.py

awacke1

Rename app.py to backup3.searchvideoworks.app.py

5503ac5 verified 9 months ago

raw

history blame

17.8 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import torch
	import json
	import os
	import glob
	from pathlib import Path
	from datetime import datetime
	import edge_tts
	import asyncio
	import base64
	import requests
	import plotly.graph_objects as go
	from gradio_client import Client
	from collections import defaultdict
	from bs4 import BeautifulSoup
	from audio_recorder_streamlit import audio_recorder
	import streamlit.components.v1 as components

	# Page configuration
	st.set_page_config(
	page_title="Video Search & Research Assistant",
	page_icon="🎥",
	layout="wide",
	initial_sidebar_state="auto",
	)

	# Initialize session state
	if 'search_history' not in st.session_state:
	st.session_state['search_history'] = []
	if 'last_voice_input' not in st.session_state:
	st.session_state['last_voice_input'] = ""
	if 'transcript_history' not in st.session_state:
	st.session_state['transcript_history'] = []
	if 'should_rerun' not in st.session_state:
	st.session_state['should_rerun'] = False

	# Custom styling
	st.markdown("""
	<style>
	.main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
	.stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
	.stButton>button { margin-right: 0.5rem; }
	</style>
	""", unsafe_allow_html=True)

	# Initialize components
	speech_component = components.declare_component("speech_recognition", path="mycomponent")

	class VideoSearch:
	def __init__(self):
	self.text_model = SentenceTransformer('all-MiniLM-L6-v2')
	self.load_dataset()

	def fetch_dataset_rows(self):
	"""Fetch dataset from Hugging Face API with debug and caching"""
	try:
	st.info("Fetching from Hugging Face API...")
	url = "https://datasets-server.huggingface.co/first-rows?dataset=omegalabsinc%2Fomega-multimodal&config=default&split=train"

	response = requests.get(url, timeout=30)
	st.write(f"Response status: {response.status_code}")

	if response.status_code == 200:
	data = response.json()

	if 'rows' in data:
	# Extract actual row data from the nested structure
	processed_rows = []
	for row_data in data['rows']:
	if 'row' in row_data: # Access the nested 'row' data
	processed_rows.append(row_data['row'])

	df = pd.DataFrame(processed_rows)

	# Debug output
	st.write("DataFrame columns after processing:", list(df.columns))
	st.write("Number of rows:", len(df))

	return df
	else:
	st.error("No 'rows' found in API response")
	st.write("Raw API Response:", data)
	return self.load_example_data()
	else:
	st.error(f"API request failed with status code: {response.status_code}")
	return self.load_example_data()

	except Exception as e:
	st.error(f"Error fetching dataset: {str(e)}")
	return self.load_example_data()

	def load_example_data(self):
	"""Load example data as fallback"""
	example_data = [
	{
	"video_id": "cd21da96-fcca-4c94-a60f-0b1e4e1e29fc",
	"youtube_id": "IO-vwtyicn4",
	"description": "This video shows a close-up of an ancient text carved into a surface, with the text appearing to be in a cursive script.",
	"views": 45489,
	"start_time": 1452,
	"end_time": 1458,
	"video_embed": [0.014160037972033024, -0.003111184574663639, -0.016604168340563774],
	"description_embed": [-0.05835828185081482, 0.02589797042310238, 0.11952091753482819]
	},
	{
	"video_id": "a8ebde7d-d717-4c1e-8be4-bdb4bc0c544f",
	"youtube_id": "mo4rEyF7gTE",
	"description": "This video shows a close-up view of a classical architectural structure, featuring stone statues with ornate details.",
	"views": 4468,
	"start_time": 318,
	"end_time": 324,
	"video_embed": [0.015160037972033024, -0.004111184574663639, -0.017604168340563774],
	"description_embed": [-0.06835828185081482, 0.03589797042310238, 0.12952091753482819]
	},
	{
	"video_id": "d1be64a6-22e2-4fbd-a176-20749e7c3d8a",
	"youtube_id": "IO-vwtyicn4",
	"description": "This video shows a weathered ancient painting depicting figures in classical style with vibrant colors preserved.",
	"views": 45489,
	"start_time": 1698,
	"end_time": 1704,
	"video_embed": [0.016160037972033024, -0.005111184574663639, -0.018604168340563774],
	"description_embed": [-0.07835828185081482, 0.04589797042310238, 0.13952091753482819]
	}
	]
	return pd.DataFrame(example_data)

	def prepare_features(self):
	"""Prepare and cache embeddings"""
	try:
	if 'video_embed' not in self.dataset.columns:
	st.warning("Using example data embeddings")
	self.dataset = self.load_example_data()

	# Debug: Show raw data types and first row
	st.write("Data Types:", self.dataset.dtypes)
	st.write("\nFirst row of embeddings:")
	st.write("video_embed type:", type(self.dataset['video_embed'].iloc[0]))
	st.write("video_embed content:", self.dataset['video_embed'].iloc[0])
	st.write("\ndescription_embed type:", type(self.dataset['description_embed'].iloc[0]))
	st.write("description_embed content:", self.dataset['description_embed'].iloc[0])

	# Convert string representations of embeddings back to numpy arrays
	def safe_eval_list(s):
	try:
	# Clean the string representation
	if isinstance(s, str):
	s = s.replace('[', '').replace(']', '').strip()
	# Split by whitespace and/or commas
	numbers = [float(x.strip()) for x in s.split() if x.strip()]
	return numbers
	elif isinstance(s, list):
	return [float(x) for x in s]
	else:
	st.error(f"Unexpected type for embedding: {type(s)}")
	return None
	except Exception as e:
	st.error(f"Error parsing embedding: {str(e)}")
	st.write("Problematic string:", s)
	return None

	# Process embeddings with detailed error reporting
	video_embeds = []
	text_embeds = []

	for idx in range(len(self.dataset)):
	try:
	video_embed = safe_eval_list(self.dataset['video_embed'].iloc[idx])
	desc_embed = safe_eval_list(self.dataset['description_embed'].iloc[idx])

	if video_embed is not None and desc_embed is not None:
	video_embeds.append(video_embed)
	text_embeds.append(desc_embed)
	else:
	st.warning(f"Skipping row {idx} due to parsing failure")
	except Exception as e:
	st.error(f"Error processing row {idx}: {str(e)}")
	st.write("Row data:", self.dataset.iloc[idx])

	if video_embeds and text_embeds:
	try:
	self.video_embeds = np.array(video_embeds)
	self.text_embeds = np.array(text_embeds)
	st.success(f"Successfully processed {len(video_embeds)} embeddings")
	st.write("Video embeddings shape:", self.video_embeds.shape)
	st.write("Text embeddings shape:", self.text_embeds.shape)
	except Exception as e:
	st.error(f"Error converting to numpy arrays: {str(e)}")
	else:
	st.warning("No valid embeddings found, using random embeddings")
	num_rows = len(self.dataset)
	self.video_embeds = np.random.randn(num_rows, 384)
	self.text_embeds = np.random.randn(num_rows, 384)

	except Exception as e:
	st.error(f"Error preparing features: {str(e)}")
	import traceback
	st.write("Traceback:", traceback.format_exc())
	# Create random embeddings as fallback
	num_rows = len(self.dataset)
	self.video_embeds = np.random.randn(num_rows, 384)
	self.text_embeds = np.random.randn(num_rows, 384)

	def load_dataset(self):
	try:
	self.dataset = self.fetch_dataset_rows()
	if self.dataset is not None:
	self.prepare_features()
	else:
	self.create_dummy_data()
	except Exception as e:
	st.error(f"Error loading dataset: {e}")
	self.create_dummy_data()

	def prepare_features(self):
	try:
	self.video_embeds = np.array([json.loads(e) if isinstance(e, str) else e
	for e in self.dataset.video_embed])
	self.text_embeds = np.array([json.loads(e) if isinstance(e, str) else e
	for e in self.dataset.description_embed])
	except Exception as e:
	st.error(f"Error preparing features: {e}")
	num_rows = len(self.dataset)
	self.video_embeds = np.random.randn(num_rows, 384)
	self.text_embeds = np.random.randn(num_rows, 384)

	def search(self, query, top_k=5):
	query_embedding = self.text_model.encode([query])[0]

	video_sims = cosine_similarity([query_embedding], self.video_embeds)[0]
	text_sims = cosine_similarity([query_embedding], self.text_embeds)[0]

	combined_sims = 0.5 * video_sims + 0.5 * text_sims
	top_indices = np.argsort(combined_sims)[-top_k:][::-1]

	results = []
	for idx in top_indices:
	results.append({
	'video_id': self.dataset.iloc[idx]['video_id'],
	'youtube_id': self.dataset.iloc[idx]['youtube_id'],
	'description': self.dataset.iloc[idx]['description'],
	'start_time': self.dataset.iloc[idx]['start_time'],
	'end_time': self.dataset.iloc[idx]['end_time'],
	'relevance_score': float(combined_sims[idx]),
	'views': self.dataset.iloc[idx]['views']
	})
	return results

	def perform_arxiv_search(query, vocal_summary=True, extended_refs=False):
	"""Perform Arxiv search with audio summaries"""
	try:
	client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
	refs = client.predict(query, 20, "Semantic Search",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	api_name="/update_with_rag_md")[0]
	response = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1",
	True, api_name="/ask_llm")

	result = f"### 🔎 {query}\n\n{response}\n\n{refs}"
	st.markdown(result)

	if vocal_summary:
	audio_file = asyncio.run(generate_speech(response[:500]))
	if audio_file:
	st.audio(audio_file)
	os.remove(audio_file)

	return result
	except Exception as e:
	st.error(f"Error in Arxiv search: {e}")
	return None

	async def generate_speech(text, voice="en-US-AriaNeural"):
	"""Generate speech using Edge TTS"""
	if not text.strip():
	return None

	try:
	communicate = edge_tts.Communicate(text, voice)
	audio_file = f"speech_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3"
	await communicate.save(audio_file)
	return audio_file
	except Exception as e:
	st.error(f"Error generating speech: {e}")
	return None

	def process_audio_input(audio_bytes):
	"""Process audio input from recorder"""
	if audio_bytes:
	# Save temporary file
	audio_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
	with open(audio_path, "wb") as f:
	f.write(audio_bytes)

	# Here you would typically use a speech-to-text service
	# For now, we'll just acknowledge the recording
	st.success("Audio recorded successfully!")

	# Cleanup
	if os.path.exists(audio_path):
	os.remove(audio_path)

	return True
	return False

	def main():
	st.title("🎥 Video Search & Research Assistant")

	# Initialize search
	search = VideoSearch()

	# Create main tabs
	tab1, tab2, tab3 = st.tabs(["🔍 Video Search", "🎙️ Voice & Audio", "📚 Arxiv Research"])

	with tab1:
	st.subheader("Search Video Dataset")

	# Text search
	query = st.text_input("Enter your search query:")
	col1, col2 = st.columns(2)

	with col1:
	search_button = st.button("🔍 Search")
	with col2:
	num_results = st.slider("Number of results:", 1, 10, 5)

	if search_button and query:
	results = search.search(query, num_results)
	st.session_state['search_history'].append({
	'query': query,
	'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	'results': results
	})

	for i, result in enumerate(results, 1):
	with st.expander(f"Result {i}: {result['description'][:100]}...", expanded=i==1):
	cols = st.columns([2, 1])

	with cols[0]:
	st.markdown(f"Full Description:")
	st.write(result['description'])
	st.markdown(f"Time Range: {result['start_time']}s - {result['end_time']}s")
	st.markdown(f"Views: {result['views']:,}")

	with cols[1]:
	st.markdown(f"Relevance Score: {result['relevance_score']:.2%}")
	if result['youtube_id']:
	st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")

	# Generate audio summary
	if st.button(f"🔊 Generate Audio Summary", key=f"audio_{i}"):
	summary = f"Video summary: {result['description'][:200]}"
	audio_file = asyncio.run(generate_speech(summary))
	if audio_file:
	st.audio(audio_file)
	os.remove(audio_file)

	with tab2:
	st.subheader("Voice Input & Audio Recording")

	col1, col2 = st.columns(2)
	with col1:
	st.write("🎙️ Speech Recognition")
	voice_input = speech_component()

	if voice_input and voice_input != st.session_state['last_voice_input']:
	st.session_state['last_voice_input'] = voice_input
	st.markdown("Transcribed Text:")
	st.write(voice_input)

	if st.button("🔍 Search Videos"):
	results = search.search(voice_input, num_results)
	for i, result in enumerate(results, 1):
	with st.expander(f"Result {i}", expanded=i==1):
	st.write(result['description'])
	if result['youtube_id']:
	st.video(f"https://youtube.com/watch?v={result['youtube_id']}&t={result['start_time']}")

	with col2:
	st.write("🎵 Audio Recorder")
	audio_bytes = audio_recorder()
	if audio_bytes:
	process_audio_input(audio_bytes)

	with tab3:
	st.subheader("Arxiv Research")
	arxiv_query = st.text_input("🔍 Research Query:")

	col1, col2 = st.columns(2)
	with col1:
	vocal_summary = st.checkbox("Generate Audio Summary", value=True)
	with col2:
	extended_refs = st.checkbox("Include Extended References", value=False)

	if st.button("🔍 Search Arxiv") and arxiv_query:
	perform_arxiv_search(arxiv_query, vocal_summary, extended_refs)

	# Sidebar for history and settings
	with st.sidebar:
	st.subheader("⚙️ Settings & History")

	if st.button("🗑️ Clear History"):
	st.session_state['search_history'] = []
	st.experimental_rerun()

	st.markdown("### Recent Searches")
	for entry in reversed(st.session_state['search_history'][-5:]):
	st.markdown(f"{entry['timestamp']}: {entry['query']}")

	st.markdown("### Voice Settings")
	st.selectbox("TTS Voice:",
	["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"],
	key="tts_voice")

	if __name__ == "__main__":
	main()