yuvarajareddy001 commited on
Commit
46ed0e6
·
verified ·
1 Parent(s): 8fb954c

Deploying pipeline

Browse files
Files changed (3) hide show
  1. app.py +81 -0
  2. multilingual_sentiment_model.py +163 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import logging
4
+ from multilingual_sentiment_model import *
5
+
6
+ # === Setup Logging ===
7
+ logging.basicConfig(
8
+ level=logging.INFO,
9
+ format="%(asctime)s - %(levelname)s - %(message)s",
10
+ )
11
+
12
+ # Gradio Function with Logging
13
+ def youtube_sentiment_analysis(url, num_of_comments):
14
+ try:
15
+ video_id = extract_video_id(url)
16
+ if not video_id:
17
+ logging.warning("Invalid YouTube URL entered in UI.")
18
+ return "Error: Invalid YouTube URL", None, None
19
+
20
+ video_title = get_video_title(video_id) # Fetch video title
21
+
22
+ comments, error = get_comments(video_id, int(num_of_comments))
23
+ if error:
24
+ logging.error(f"Error fetching comments: {error}")
25
+ return f"Error fetching comments: {error}", None, None
26
+
27
+ if not comments:
28
+ logging.warning("No comments found for the video.")
29
+ return "Error: No comments found.", None, None
30
+
31
+ sentiment_results, sentiment_counts = analyze_sentiment(comments)
32
+ chart = plot_pie_chart(sentiment_counts, video_title) # Pass title to the chart
33
+ summary = get_overall_sentiment(sentiment_counts)
34
+
35
+ return summary, chart, pd.DataFrame(sentiment_results).head(5)
36
+
37
+ except Exception as e:
38
+ logging.exception(f"Unexpected Error: {str(e)}")
39
+ return f"Unexpected Error: {str(e)}", None, None
40
+
41
+ # Gradio Interface (All Outputs Below Input)
42
+ iface = gr.Blocks()
43
+
44
+ # Example YouTube URLs
45
+ example_urls = [
46
+ "https://www.youtube.com/watch?v=0e9WuB0Ua98",
47
+ "https://www.youtube.com/watch?v=3JZ_D3ELwOQ",
48
+ "https://youtu.be/dQw4w9WgXcQ",
49
+ "https://www.youtube.com/watch?v=9bZkp7q19f0",
50
+ "https://www.youtube.com/watch?v=2Vv-BfVoq4g"
51
+ ]
52
+
53
+ with iface:
54
+ gr.Markdown("## YouTube Comment Sentiment Analysis", elem_classes='centered-title')
55
+
56
+ gr.Markdown("Enter a YouTube video URL and specify the number of comments to analyze.")
57
+
58
+ with gr.Row():
59
+ youtube_url = gr.Textbox(label="YouTube Video URL")
60
+ num_comments = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label="Number of Comments to Fetch")
61
+
62
+ submit_btn = gr.Button("Submit")
63
+
64
+ # All outputs are placed BELOW the input
65
+ output_summary = gr.Textbox(label="Overall Sentiment Summary")
66
+ output_chart = gr.Plot(label="Sentiment Chart")
67
+ output_table = gr.Dataframe(label="Comment Sentiment Analysis")
68
+
69
+ submit_btn.click(
70
+ youtube_sentiment_analysis,
71
+ inputs=[youtube_url, num_comments],
72
+ outputs=[output_summary, output_chart, output_table],
73
+ )
74
+
75
+ gr.Markdown("### Example YouTube Video URLs for Testing (Click to Use)")
76
+ with gr.Row():
77
+ for example in example_urls:
78
+ gr.Button(example).click(fn=lambda x=example: x, outputs=[youtube_url])
79
+
80
+ # Launch App
81
+ iface.launch(share=True)
multilingual_sentiment_model.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import logging
5
+ from googleapiclient.discovery import build
6
+ from transformers import pipeline
7
+ import textwrap
8
+
9
+ # === Setup Logging ===
10
+ logging.basicConfig(
11
+ filename="app_logs.log", # Log file name
12
+ level=logging.INFO, # Log info, warnings, and errors
13
+ format="%(asctime)s - %(levelname)s - %(message)s",
14
+ )
15
+
16
+ # Replace with your API Key
17
+ API_KEY = "AIzaSyAlKTUhY9t3yaJvk0E2goCuLEtcsTOFMBM"
18
+
19
+ # Load Hugging Face Sentiment Model
20
+ try:
21
+ sentiment_classifier = pipeline(
22
+ model="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
23
+ top_k=None
24
+ )
25
+ logging.info("Sentiment analysis model loaded successfully.")
26
+ except Exception as e:
27
+ logging.error(f"Failed to load sentiment model: {e}")
28
+ raise RuntimeError("Error loading sentiment model. Check logs for details.")
29
+
30
+ # Extract Video ID from URL
31
+
32
+ def extract_video_id(url):
33
+ """
34
+ Extracts YouTube video ID from various YouTube URL formats.
35
+ """
36
+ try:
37
+ # Handle multiple YouTube URL formats
38
+ patterns = [
39
+ r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([^&]+)",
40
+ r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([^?]+)",
41
+ r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([^?]+)",
42
+ r"(?:https?:\/\/)?youtu\.be\/([^?]+)"
43
+ ]
44
+
45
+ for pattern in patterns:
46
+ match = re.search(pattern, url)
47
+ if match:
48
+ video_id = match.group(1)
49
+ return video_id
50
+
51
+ return None # If no match found, return None
52
+ except Exception as e:
53
+ return None
54
+
55
+ # Fetch YouTube Comments with Pagination
56
+ def get_comments(video_id, max_results=500):
57
+ youtube = build("youtube", "v3", developerKey=API_KEY)
58
+ comments = []
59
+ next_page_token = None
60
+
61
+ try:
62
+ while len(comments) < max_results:
63
+ request = youtube.commentThreads().list(
64
+ part="snippet",
65
+ videoId=video_id,
66
+ maxResults=min(100, max_results - len(comments)), # Up to 100 per request
67
+ textFormat="plainText",
68
+ pageToken=next_page_token
69
+ )
70
+ response = request.execute()
71
+
72
+ for item in response.get("items", []):
73
+ comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
74
+ comments.append(comment)
75
+
76
+ next_page_token = response.get("nextPageToken")
77
+ if not next_page_token:
78
+ break
79
+
80
+ logging.info(f"Fetched {len(comments)} comments for Video ID: {video_id}")
81
+ except Exception as e:
82
+ logging.error(f"Error fetching comments: {e}")
83
+ return [], f"Error fetching comments: {e}"
84
+
85
+ return comments[:max_results], None
86
+
87
+
88
+ def get_video_title(video_id):
89
+ """
90
+ Fetches the title of the YouTube video using the YouTube Data API.
91
+ """
92
+ youtube = build("youtube", "v3", developerKey=API_KEY)
93
+
94
+ try:
95
+ request = youtube.videos().list(
96
+ part="snippet",
97
+ id=video_id
98
+ )
99
+ response = request.execute()
100
+
101
+ if "items" in response and len(response["items"]) > 0:
102
+ video_title = response["items"][0]["snippet"]["title"]
103
+ return video_title
104
+ else:
105
+ return "Unknown Video Title"
106
+ except Exception as e:
107
+ logging.error(f"Error fetching video title: {e}")
108
+ return "Error Fetching Title"
109
+
110
+ # Sentiment Analysis
111
+ def analyze_sentiment(comments):
112
+ results = []
113
+ sentiment_counts = {"positive": 0, "neutral": 0, "negative": 0}
114
+
115
+ try:
116
+ for comment in comments:
117
+ sentiment_scores = sentiment_classifier(comment)[0]
118
+ sentiment = max(sentiment_scores, key=lambda x: x['score'])
119
+ sentiment_label = sentiment['label']
120
+ sentiment_counts[sentiment_label] += 1
121
+ results.append({"Comment": comment, "Sentiment": sentiment_label, "Score": sentiment['score']})
122
+
123
+ logging.info("Sentiment analysis completed successfully.")
124
+ except Exception as e:
125
+ logging.error(f"Error analyzing sentiment: {e}")
126
+ return [], f"Error analyzing sentiment: {e}"
127
+
128
+ return results, sentiment_counts
129
+
130
+ # Generate Pie Chart
131
+ def plot_pie_chart(sentiment_counts, video_title):
132
+ """
133
+ Generates a pie chart for sentiment distribution with a wrapped video title.
134
+ """
135
+ try:
136
+ fig, ax = plt.subplots(figsize=(8,6)) # Increase figure size for better visibility
137
+
138
+ # Wrap title if it's too long
139
+ wrapped_title = "\n".join(textwrap.wrap(video_title, width=50)) # Wrap title every 50 characters
140
+
141
+ ax.pie(
142
+ sentiment_counts.values(),
143
+ labels=sentiment_counts.keys(),
144
+ autopct='%1.1f%%',
145
+ startangle=140
146
+ )
147
+ ax.set_title(f"Sentiment Analysis for:\n{wrapped_title}", fontsize=10) # Apply wrapped title
148
+
149
+ logging.info(f"Pie chart generated successfully for {video_title}.")
150
+ return fig
151
+ except Exception as e:
152
+ logging.error(f"Error generating pie chart: {e}")
153
+ return None
154
+
155
+ # Overall Sentiment Summary
156
+ def get_overall_sentiment(sentiment_counts):
157
+ try:
158
+ overall_sentiment = f"Overall Video Sentiment: {max(sentiment_counts, key=sentiment_counts.get).upper()}"
159
+ logging.info(f"Overall Sentiment: {overall_sentiment}")
160
+ return overall_sentiment
161
+ except Exception as e:
162
+ logging.error(f"Error calculating overall sentiment: {e}")
163
+ return "Error calculating overall sentiment."
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ google-api-python-client
2
+ transformers
3
+ torch
4
+ pandas
5
+ matplotlib
6
+ gradio