Spaces:

mfraz
/

Netflix-data

Sleeping

File size: 3,163 Bytes

2cb0aa0
9c3c0a3
 
 
 
 
 
 
 
89e69ce
 
9c3c0a3
 
 
 
 
 
 
 
89e69ce
9c3c0a3
 
89e69ce
 
 
 
 
9c3c0a3
 
 
 
 
 
89e69ce
9c3c0a3
89e69ce
9c3c0a3
 
 
 
 
 
 
 
89e69ce
9c3c0a3
 
 
 
 
 
 
 
 
 
 
 
89e69ce
9c3c0a3
89e69ce
9c3c0a3
 
 
 
 
 
 
89e69ce
9c3c0a3
89e69ce
 
 
 
 
 
 
9c3c0a3
 
 
 
 
 
89e69ce

pip install torch torchvision torchaudio
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import speech_recognition as sr

# Load the Netflix dataset from CSV
@st.cache_data
def load_data():
    url = "https://huggingface.co/spaces/mfraz/Netflix-data/resolve/main/netflix_titles.csv"
    return pd.read_csv(url)

# Load DialoGPT model and tokenizer
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
    model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
    return tokenizer, model

# Function to search movie details
def search_movie_details(query, data):
    query = query.lower()
    search_columns = ["title", "cast", "director"]
    
    results = data.dropna(subset=search_columns)  # Remove NaN values for safe searching
    results = results[results[search_columns].apply(lambda x: x.astype(str).str.lower().str.contains(query).any(), axis=1)]
    
    return results

# Function to convert voice to text
def voice_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        st.write("🎙 Speak now...")
        try:
            audio = recognizer.listen(source, timeout=5)
            text = recognizer.recognize_google(audio)
            return text
        except sr.UnknownValueError:
            return "Sorry, I could not understand the audio."
        except sr.RequestError:
            return "Sorry, the speech service is down."

# Streamlit App
st.title("🎬 Netflix Movie Search Chatbot")

# Load dataset and model
data = load_data()
tokenizer, model = load_model()

# Input options: Text or Voice
input_option = st.radio("Choose input method:", ("Text", "Voice"))

user_input = ""
if input_option == "Text":
    user_input = st.text_input("Enter the movie name, director, or cast:")
elif input_option == "Voice":
    if st.button("🎤 Start Recording"):
        user_input = voice_to_text()
        st.write(f"🗣 You said: **{user_input}**")

# Generate response
if user_input:
    # Search for movie details
    movie_results = search_movie_details(user_input, data)
    
    if not movie_results.empty:
        st.write("🎥 **Here are the matching results:**")
        for _, row in movie_results.iterrows():
            st.write(f"**📌 Title:** {row.get('title', 'N/A')}")
            st.write(f"**🎭 Type:** {row.get('type', 'N/A')}")
            st.write(f"**🎬 Director:** {row.get('director', 'N/A')}")
            st.write(f"**👥 Cast:** {row.get('cast', 'N/A')}")
            st.write(f"**📅 Release Year:** {row.get('release_year', 'N/A')}")
            st.write(f"**⭐ Rating:** {row.get('rating', 'N/A')}")
            st.write(f"**📝 Description:** {row.get('description', 'N/A')}")
            st.write("---")
    else:
        # Use DialoGPT for general conversation
        inputs = tokenizer.encode(user_input, return_tensors="pt")
        outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        st.write(f"🤖 **Chatbot:** {response}")