Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import speech_recognition as sr | |
from pydub import AudioSegment | |
# Load the Netflix dataset from CSV | |
def load_data(): | |
return pd.read_csv("netflix_titles.csv") | |
# Load DialoGPT model and tokenizer | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium") | |
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium") | |
return tokenizer, model | |
# Function to search the dataset for movie details | |
def search_movie_details(query, data): | |
query = query.lower() | |
results = data[ | |
data["title"].str.lower().str.contains(query) | | |
data["cast"].str.lower().str.contains(query) | | |
data["director"].str.lower().str.contains(query) | |
] | |
return results | |
# Function to convert voice to text | |
def voice_to_text(): | |
recognizer = sr.Recognizer() | |
with sr.Microphone() as source: | |
st.write("Speak now...") | |
audio = recognizer.listen(source) | |
try: | |
text = recognizer.recognize_google(audio) | |
return text | |
except sr.UnknownValueError: | |
return "Sorry, I could not understand the audio." | |
except sr.RequestError: | |
return "Sorry, the speech service is down." | |
# Streamlit App | |
st.title("Netflix Movie Details Chatbot 🎬") | |
# Load dataset and model | |
data = load_data() | |
tokenizer, model = load_model() | |
# Input options: Text or Voice | |
input_option = st.radio("Choose input method:", ("Text", "Voice")) | |
user_input = "" | |
if input_option == "Text": | |
user_input = st.text_input("Enter the movie name, director, or cast:") | |
elif input_option == "Voice": | |
if st.button("Start Recording"): | |
user_input = voice_to_text() | |
st.write(f"You said: {user_input}") | |
# Generate response | |
if user_input: | |
# Search for movie details | |
movie_results = search_movie_details(user_input, data) | |
if not movie_results.empty: | |
st.write("Here are the matching results:") | |
for _, row in movie_results.iterrows(): | |
st.write(f"**Title:** {row['title']}") | |
st.write(f"**Type:** {row['type']}") | |
st.write(f"**Director:** {row['director']}") | |
st.write(f"**Cast:** {row['cast']}") | |
st.write(f"**Release Year:** {row['release_year']}") | |
st.write(f"**Rating:** {row['rating']}") | |
st.write(f"**Description:** {row['description']}") | |
st.write("---") | |
else: | |
# Use DialoGPT for general conversation | |
inputs = tokenizer.encode(user_input, return_tensors="pt") | |
outputs = model.generate(inputs, max_length=100, num_return_sequences=1) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
st.write(f"Chatbot: {response}") |