Netflix-data / app.py
mfraz's picture
Update app.py
2cb0aa0 verified
raw
history blame
3.16 kB
pip install torch torchvision torchaudio
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import speech_recognition as sr
# Load the Netflix dataset from CSV
@st.cache_data
def load_data():
url = "https://huggingface.co/spaces/mfraz/Netflix-data/resolve/main/netflix_titles.csv"
return pd.read_csv(url)
# Load DialoGPT model and tokenizer
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
return tokenizer, model
# Function to search movie details
def search_movie_details(query, data):
query = query.lower()
search_columns = ["title", "cast", "director"]
results = data.dropna(subset=search_columns) # Remove NaN values for safe searching
results = results[results[search_columns].apply(lambda x: x.astype(str).str.lower().str.contains(query).any(), axis=1)]
return results
# Function to convert voice to text
def voice_to_text():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("πŸŽ™ Speak now...")
try:
audio = recognizer.listen(source, timeout=5)
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return "Sorry, I could not understand the audio."
except sr.RequestError:
return "Sorry, the speech service is down."
# Streamlit App
st.title("🎬 Netflix Movie Search Chatbot")
# Load dataset and model
data = load_data()
tokenizer, model = load_model()
# Input options: Text or Voice
input_option = st.radio("Choose input method:", ("Text", "Voice"))
user_input = ""
if input_option == "Text":
user_input = st.text_input("Enter the movie name, director, or cast:")
elif input_option == "Voice":
if st.button("🎀 Start Recording"):
user_input = voice_to_text()
st.write(f"πŸ—£ You said: **{user_input}**")
# Generate response
if user_input:
# Search for movie details
movie_results = search_movie_details(user_input, data)
if not movie_results.empty:
st.write("πŸŽ₯ **Here are the matching results:**")
for _, row in movie_results.iterrows():
st.write(f"**πŸ“Œ Title:** {row.get('title', 'N/A')}")
st.write(f"**🎭 Type:** {row.get('type', 'N/A')}")
st.write(f"**🎬 Director:** {row.get('director', 'N/A')}")
st.write(f"**πŸ‘₯ Cast:** {row.get('cast', 'N/A')}")
st.write(f"**πŸ“… Release Year:** {row.get('release_year', 'N/A')}")
st.write(f"**⭐ Rating:** {row.get('rating', 'N/A')}")
st.write(f"**πŸ“ Description:** {row.get('description', 'N/A')}")
st.write("---")
else:
# Use DialoGPT for general conversation
inputs = tokenizer.encode(user_input, return_tensors="pt")
outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.write(f"πŸ€– **Chatbot:** {response}")