Spaces:
Sleeping
Sleeping
pip install torch torchvision torchaudio | |
import streamlit as st | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import speech_recognition as sr | |
# Load the Netflix dataset from CSV | |
def load_data(): | |
url = "https://huggingface.co/spaces/mfraz/Netflix-data/resolve/main/netflix_titles.csv" | |
return pd.read_csv(url) | |
# Load DialoGPT model and tokenizer | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium") | |
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium") | |
return tokenizer, model | |
# Function to search movie details | |
def search_movie_details(query, data): | |
query = query.lower() | |
search_columns = ["title", "cast", "director"] | |
results = data.dropna(subset=search_columns) # Remove NaN values for safe searching | |
results = results[results[search_columns].apply(lambda x: x.astype(str).str.lower().str.contains(query).any(), axis=1)] | |
return results | |
# Function to convert voice to text | |
def voice_to_text(): | |
recognizer = sr.Recognizer() | |
with sr.Microphone() as source: | |
st.write("π Speak now...") | |
try: | |
audio = recognizer.listen(source, timeout=5) | |
text = recognizer.recognize_google(audio) | |
return text | |
except sr.UnknownValueError: | |
return "Sorry, I could not understand the audio." | |
except sr.RequestError: | |
return "Sorry, the speech service is down." | |
# Streamlit App | |
st.title("π¬ Netflix Movie Search Chatbot") | |
# Load dataset and model | |
data = load_data() | |
tokenizer, model = load_model() | |
# Input options: Text or Voice | |
input_option = st.radio("Choose input method:", ("Text", "Voice")) | |
user_input = "" | |
if input_option == "Text": | |
user_input = st.text_input("Enter the movie name, director, or cast:") | |
elif input_option == "Voice": | |
if st.button("π€ Start Recording"): | |
user_input = voice_to_text() | |
st.write(f"π£ You said: **{user_input}**") | |
# Generate response | |
if user_input: | |
# Search for movie details | |
movie_results = search_movie_details(user_input, data) | |
if not movie_results.empty: | |
st.write("π₯ **Here are the matching results:**") | |
for _, row in movie_results.iterrows(): | |
st.write(f"**π Title:** {row.get('title', 'N/A')}") | |
st.write(f"**π Type:** {row.get('type', 'N/A')}") | |
st.write(f"**π¬ Director:** {row.get('director', 'N/A')}") | |
st.write(f"**π₯ Cast:** {row.get('cast', 'N/A')}") | |
st.write(f"**π Release Year:** {row.get('release_year', 'N/A')}") | |
st.write(f"**β Rating:** {row.get('rating', 'N/A')}") | |
st.write(f"**π Description:** {row.get('description', 'N/A')}") | |
st.write("---") | |
else: | |
# Use DialoGPT for general conversation | |
inputs = tokenizer.encode(user_input, return_tensors="pt") | |
outputs = model.generate(inputs, max_length=100, num_return_sequences=1) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
st.write(f"π€ **Chatbot:** {response}") | |