mfraz commited on
Commit
89e69ce
Β·
verified Β·
1 Parent(s): 5bbb0e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -22
app.py CHANGED
@@ -2,12 +2,12 @@ import streamlit as st
2
  import pandas as pd
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import speech_recognition as sr
5
- from pydub import AudioSegment
6
 
7
  # Load the Netflix dataset from CSV
8
  @st.cache_data
9
  def load_data():
10
- return pd.read_csv("https://huggingface.co/spaces/mfraz/Netflix-data/resolve/main/netflix_titles.csv")
 
11
 
12
  # Load DialoGPT model and tokenizer
13
  @st.cache_resource
@@ -16,23 +16,23 @@ def load_model():
16
  model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
17
  return tokenizer, model
18
 
19
- # Function to search the dataset for movie details
20
  def search_movie_details(query, data):
21
  query = query.lower()
22
- results = data[
23
- data["title"].str.lower().str.contains(query) |
24
- data["cast"].str.lower().str.contains(query) |
25
- data["director"].str.lower().str.contains(query)
26
- ]
27
  return results
28
 
29
  # Function to convert voice to text
30
  def voice_to_text():
31
  recognizer = sr.Recognizer()
32
  with sr.Microphone() as source:
33
- st.write("Speak now...")
34
- audio = recognizer.listen(source)
35
  try:
 
36
  text = recognizer.recognize_google(audio)
37
  return text
38
  except sr.UnknownValueError:
@@ -41,7 +41,7 @@ def voice_to_text():
41
  return "Sorry, the speech service is down."
42
 
43
  # Streamlit App
44
- st.title("Netflix Movie Details Chatbot 🎬")
45
 
46
  # Load dataset and model
47
  data = load_data()
@@ -54,9 +54,9 @@ user_input = ""
54
  if input_option == "Text":
55
  user_input = st.text_input("Enter the movie name, director, or cast:")
56
  elif input_option == "Voice":
57
- if st.button("Start Recording"):
58
  user_input = voice_to_text()
59
- st.write(f"You said: {user_input}")
60
 
61
  # Generate response
62
  if user_input:
@@ -64,19 +64,19 @@ if user_input:
64
  movie_results = search_movie_details(user_input, data)
65
 
66
  if not movie_results.empty:
67
- st.write("Here are the matching results:")
68
  for _, row in movie_results.iterrows():
69
- st.write(f"**Title:** {row['title']}")
70
- st.write(f"**Type:** {row['type']}")
71
- st.write(f"**Director:** {row['director']}")
72
- st.write(f"**Cast:** {row['cast']}")
73
- st.write(f"**Release Year:** {row['release_year']}")
74
- st.write(f"**Rating:** {row['rating']}")
75
- st.write(f"**Description:** {row['description']}")
76
  st.write("---")
77
  else:
78
  # Use DialoGPT for general conversation
79
  inputs = tokenizer.encode(user_input, return_tensors="pt")
80
  outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
81
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
82
- st.write(f"Chatbot: {response}")
 
2
  import pandas as pd
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import speech_recognition as sr
 
5
 
6
  # Load the Netflix dataset from CSV
7
  @st.cache_data
8
  def load_data():
9
+ url = "https://huggingface.co/spaces/mfraz/Netflix-data/resolve/main/netflix_titles.csv"
10
+ return pd.read_csv(url)
11
 
12
  # Load DialoGPT model and tokenizer
13
  @st.cache_resource
 
16
  model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
17
  return tokenizer, model
18
 
19
+ # Function to search movie details
20
  def search_movie_details(query, data):
21
  query = query.lower()
22
+ search_columns = ["title", "cast", "director"]
23
+
24
+ results = data.dropna(subset=search_columns) # Remove NaN values for safe searching
25
+ results = results[results[search_columns].apply(lambda x: x.astype(str).str.lower().str.contains(query).any(), axis=1)]
26
+
27
  return results
28
 
29
  # Function to convert voice to text
30
  def voice_to_text():
31
  recognizer = sr.Recognizer()
32
  with sr.Microphone() as source:
33
+ st.write("πŸŽ™ Speak now...")
 
34
  try:
35
+ audio = recognizer.listen(source, timeout=5)
36
  text = recognizer.recognize_google(audio)
37
  return text
38
  except sr.UnknownValueError:
 
41
  return "Sorry, the speech service is down."
42
 
43
  # Streamlit App
44
+ st.title("🎬 Netflix Movie Search Chatbot")
45
 
46
  # Load dataset and model
47
  data = load_data()
 
54
  if input_option == "Text":
55
  user_input = st.text_input("Enter the movie name, director, or cast:")
56
  elif input_option == "Voice":
57
+ if st.button("🎀 Start Recording"):
58
  user_input = voice_to_text()
59
+ st.write(f"πŸ—£ You said: **{user_input}**")
60
 
61
  # Generate response
62
  if user_input:
 
64
  movie_results = search_movie_details(user_input, data)
65
 
66
  if not movie_results.empty:
67
+ st.write("πŸŽ₯ **Here are the matching results:**")
68
  for _, row in movie_results.iterrows():
69
+ st.write(f"**πŸ“Œ Title:** {row.get('title', 'N/A')}")
70
+ st.write(f"**🎭 Type:** {row.get('type', 'N/A')}")
71
+ st.write(f"**🎬 Director:** {row.get('director', 'N/A')}")
72
+ st.write(f"**πŸ‘₯ Cast:** {row.get('cast', 'N/A')}")
73
+ st.write(f"**πŸ“… Release Year:** {row.get('release_year', 'N/A')}")
74
+ st.write(f"**⭐ Rating:** {row.get('rating', 'N/A')}")
75
+ st.write(f"**πŸ“ Description:** {row.get('description', 'N/A')}")
76
  st.write("---")
77
  else:
78
  # Use DialoGPT for general conversation
79
  inputs = tokenizer.encode(user_input, return_tensors="pt")
80
  outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
81
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
82
+ st.write(f"πŸ€– **Chatbot:** {response}")