youtube_comments_sentiment / multilingual_sentiment_model.py
yuvarajareddy001's picture
Deploying pipeline
46ed0e6 verified
import re
import pandas as pd
import matplotlib.pyplot as plt
import logging
from googleapiclient.discovery import build
from transformers import pipeline
import textwrap
# === Setup Logging ===
logging.basicConfig(
filename="app_logs.log", # Log file name
level=logging.INFO, # Log info, warnings, and errors
format="%(asctime)s - %(levelname)s - %(message)s",
)
# Replace with your API Key
API_KEY = "AIzaSyAlKTUhY9t3yaJvk0E2goCuLEtcsTOFMBM"
# Load Hugging Face Sentiment Model
try:
sentiment_classifier = pipeline(
model="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
top_k=None
)
logging.info("Sentiment analysis model loaded successfully.")
except Exception as e:
logging.error(f"Failed to load sentiment model: {e}")
raise RuntimeError("Error loading sentiment model. Check logs for details.")
# Extract Video ID from URL
def extract_video_id(url):
"""
Extracts YouTube video ID from various YouTube URL formats.
"""
try:
# Handle multiple YouTube URL formats
patterns = [
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)",
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([^?]+)",
r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([^?]+)",
r"(?:https?:\/\/)?youtu\.be\/([^?]+)"
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
video_id = match.group(1)
return video_id
return None # If no match found, return None
except Exception as e:
return None
# Fetch YouTube Comments with Pagination
def get_comments(video_id, max_results=500):
youtube = build("youtube", "v3", developerKey=API_KEY)
comments = []
next_page_token = None
try:
while len(comments) < max_results:
request = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
maxResults=min(100, max_results - len(comments)), # Up to 100 per request
textFormat="plainText",
pageToken=next_page_token
)
response = request.execute()
for item in response.get("items", []):
comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
comments.append(comment)
next_page_token = response.get("nextPageToken")
if not next_page_token:
break
logging.info(f"Fetched {len(comments)} comments for Video ID: {video_id}")
except Exception as e:
logging.error(f"Error fetching comments: {e}")
return [], f"Error fetching comments: {e}"
return comments[:max_results], None
def get_video_title(video_id):
"""
Fetches the title of the YouTube video using the YouTube Data API.
"""
youtube = build("youtube", "v3", developerKey=API_KEY)
try:
request = youtube.videos().list(
part="snippet",
id=video_id
)
response = request.execute()
if "items" in response and len(response["items"]) > 0:
video_title = response["items"][0]["snippet"]["title"]
return video_title
else:
return "Unknown Video Title"
except Exception as e:
logging.error(f"Error fetching video title: {e}")
return "Error Fetching Title"
# Sentiment Analysis
def analyze_sentiment(comments):
results = []
sentiment_counts = {"positive": 0, "neutral": 0, "negative": 0}
try:
for comment in comments:
sentiment_scores = sentiment_classifier(comment)[0]
sentiment = max(sentiment_scores, key=lambda x: x['score'])
sentiment_label = sentiment['label']
sentiment_counts[sentiment_label] += 1
results.append({"Comment": comment, "Sentiment": sentiment_label, "Score": sentiment['score']})
logging.info("Sentiment analysis completed successfully.")
except Exception as e:
logging.error(f"Error analyzing sentiment: {e}")
return [], f"Error analyzing sentiment: {e}"
return results, sentiment_counts
# Generate Pie Chart
def plot_pie_chart(sentiment_counts, video_title):
"""
Generates a pie chart for sentiment distribution with a wrapped video title.
"""
try:
fig, ax = plt.subplots(figsize=(8,6)) # Increase figure size for better visibility
# Wrap title if it's too long
wrapped_title = "\n".join(textwrap.wrap(video_title, width=50)) # Wrap title every 50 characters
ax.pie(
sentiment_counts.values(),
labels=sentiment_counts.keys(),
autopct='%1.1f%%',
startangle=140
)
ax.set_title(f"Sentiment Analysis for:\n{wrapped_title}", fontsize=10) # Apply wrapped title
logging.info(f"Pie chart generated successfully for {video_title}.")
return fig
except Exception as e:
logging.error(f"Error generating pie chart: {e}")
return None
# Overall Sentiment Summary
def get_overall_sentiment(sentiment_counts):
try:
overall_sentiment = f"Overall Video Sentiment: {max(sentiment_counts, key=sentiment_counts.get).upper()}"
logging.info(f"Overall Sentiment: {overall_sentiment}")
return overall_sentiment
except Exception as e:
logging.error(f"Error calculating overall sentiment: {e}")
return "Error calculating overall sentiment."