File size: 3,163 Bytes
2cb0aa0
9c3c0a3
 
 
 
 
 
 
 
89e69ce
 
9c3c0a3
 
 
 
 
 
 
 
89e69ce
9c3c0a3
 
89e69ce
 
 
 
 
9c3c0a3
 
 
 
 
 
89e69ce
9c3c0a3
89e69ce
9c3c0a3
 
 
 
 
 
 
 
89e69ce
9c3c0a3
 
 
 
 
 
 
 
 
 
 
 
89e69ce
9c3c0a3
89e69ce
9c3c0a3
 
 
 
 
 
 
89e69ce
9c3c0a3
89e69ce
 
 
 
 
 
 
9c3c0a3
 
 
 
 
 
89e69ce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
pip install torch torchvision torchaudio
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import speech_recognition as sr

# Load the Netflix dataset from CSV
@st.cache_data
def load_data():
    url = "https://huggingface.co/spaces/mfraz/Netflix-data/resolve/main/netflix_titles.csv"
    return pd.read_csv(url)

# Load DialoGPT model and tokenizer
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
    model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
    return tokenizer, model

# Function to search movie details
def search_movie_details(query, data):
    query = query.lower()
    search_columns = ["title", "cast", "director"]
    
    results = data.dropna(subset=search_columns)  # Remove NaN values for safe searching
    results = results[results[search_columns].apply(lambda x: x.astype(str).str.lower().str.contains(query).any(), axis=1)]
    
    return results

# Function to convert voice to text
def voice_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        st.write("πŸŽ™ Speak now...")
        try:
            audio = recognizer.listen(source, timeout=5)
            text = recognizer.recognize_google(audio)
            return text
        except sr.UnknownValueError:
            return "Sorry, I could not understand the audio."
        except sr.RequestError:
            return "Sorry, the speech service is down."

# Streamlit App
st.title("🎬 Netflix Movie Search Chatbot")

# Load dataset and model
data = load_data()
tokenizer, model = load_model()

# Input options: Text or Voice
input_option = st.radio("Choose input method:", ("Text", "Voice"))

user_input = ""
if input_option == "Text":
    user_input = st.text_input("Enter the movie name, director, or cast:")
elif input_option == "Voice":
    if st.button("🎀 Start Recording"):
        user_input = voice_to_text()
        st.write(f"πŸ—£ You said: **{user_input}**")

# Generate response
if user_input:
    # Search for movie details
    movie_results = search_movie_details(user_input, data)
    
    if not movie_results.empty:
        st.write("πŸŽ₯ **Here are the matching results:**")
        for _, row in movie_results.iterrows():
            st.write(f"**πŸ“Œ Title:** {row.get('title', 'N/A')}")
            st.write(f"**🎭 Type:** {row.get('type', 'N/A')}")
            st.write(f"**🎬 Director:** {row.get('director', 'N/A')}")
            st.write(f"**πŸ‘₯ Cast:** {row.get('cast', 'N/A')}")
            st.write(f"**πŸ“… Release Year:** {row.get('release_year', 'N/A')}")
            st.write(f"**⭐ Rating:** {row.get('rating', 'N/A')}")
            st.write(f"**πŸ“ Description:** {row.get('description', 'N/A')}")
            st.write("---")
    else:
        # Use DialoGPT for general conversation
        inputs = tokenizer.encode(user_input, return_tensors="pt")
        outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        st.write(f"πŸ€– **Chatbot:** {response}")