import streamlit as st
import requests
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
API_BASE_URL = "http://localhost:8000"
CSV_FILE_PATH = "src/data/merged_yt_data.csv"
KAGGLE_LINK = "https://www.kaggle.com/datasets/rsrishav/youtube-trending-video-dataset?select=IN_category_id.json"
st.set_page_config(
page_title="YouTube Trending Insights",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown("""
""", unsafe_allow_html=True)
# Sidebar with Logo and Select Box
with st.sidebar:
st.markdown("""
""", unsafe_allow_html=True)
st.markdown("📊 YouTube Analytics
", unsafe_allow_html=True)
# Kaggle Dataset Link
st.sidebar.markdown("""
📂 Kaggle Dataset
""".format(KAGGLE_LINK), unsafe_allow_html=True)
# Analysis Options
options = {
"📈 Trending Videos Over Time": "/trending_videos_count",
"🥧 Most Popular Categories": "/most_popular_categories",
"📊 Like Ratio Distribution": "/engagement/like_ratio_distribution",
"👍 Top Liked Videos": "/engagement/top_liked_videos",
"🏆 Top Trending Channels": "/channel-performance/top-trending",
"📅 Channel Growth Over Time": "/channel-performance/growth-over-time",
"❤️ Category Like-View Ratio": "/category-like-view-ratio",
"💬 Category Comment Engagement": "/category-comment-engagement"
}
selected_option = st.selectbox("Choose an analysis:", list(options.keys()),
help="Select a visualization to explore YouTube trends")
def fetch_data(endpoint):
try:
response = requests.get(f"{API_BASE_URL}{endpoint}")
response.raise_for_status()
return response.json()
except requests.RequestException as e:
st.error(f"Failed to fetch data: {e}")
return None
st.markdown(f"{selected_option}
", unsafe_allow_html=True)
data = fetch_data(options[selected_option])
if data:
if "Trending Videos" in selected_option:
df = pd.DataFrame(data["trending_video_counts"].items(), columns=["Date", "Count"])
df["Date"] = pd.to_datetime(df["Date"])
fig = px.line(df, x="Date", y="Count", title="Trending Videos Over Time")
st.plotly_chart(fig, use_container_width=True)
elif "Popular Categories" in selected_option:
df = pd.DataFrame.from_dict(data["most_popular_categories"], orient='index', columns=["Count"])
fig = px.pie(df, names=df.index, values="Count", title="Popular Categories",
hole=0.4, color_discrete_sequence=px.colors.sequential.RdBu)
st.plotly_chart(fig, use_container_width=True)
elif "Like Ratio" in selected_option:
df = pd.DataFrame(data["like_ratio_distribution"])
fig = px.histogram(df, x="like_ratio", nbins=50, title="Like Ratio Distribution",
color_discrete_sequence=['#ff0000'])
st.plotly_chart(fig, use_container_width=True)
elif "Top Liked Videos" in selected_option:
df = pd.DataFrame(data["top_liked_videos"])
fig = px.bar(df, x="title", y="likes", title="🔝 Top Liked Videos",
color="likes", color_continuous_scale="Reds")
st.plotly_chart(fig, use_container_width=True)
elif "Top Trending Channels" in selected_option:
df = pd.DataFrame(data["top_trending_channels"].items(), columns=["Channel", "Trending Count"])
df = df.sort_values(by="Trending Count", ascending=False).head(10)
fig = px.bar(df, x="Channel", y="Trending Count", title="Top Trending Channels",
color="Trending Count", color_continuous_scale="Reds")
st.plotly_chart(fig, use_container_width=True)
elif "Channel Growth" in selected_option:
df = pd.DataFrame(data)
fig = px.line(df, x="published_month", y="video_count", color="channelTitle",
title="Channel Growth Over Time", line_shape="spline")
st.plotly_chart(fig, use_container_width=True)
elif "Like-View Ratio" in selected_option:
df = pd.DataFrame(data["data"])
fig = px.sunburst(df, path=["category_name"], values="like_view_ratio",
title="Category Like-View Ratio", color="like_view_ratio",
color_continuous_scale="RdYlBu")
st.plotly_chart(fig, use_container_width=True)
elif "Comment Engagement" in selected_option:
df = pd.DataFrame(data["data"])
fig = px.treemap(df, path=["category_name"], values="comment_count",
title="Category Comment Engagement", color="comment_count",
color_continuous_scale="Blues")
st.plotly_chart(fig, use_container_width=True)
# Dataset Preview
st.sidebar.markdown("📋 Dataset Preview
", unsafe_allow_html=True)
with st.sidebar.expander("View Raw Dataset", expanded=False):
if st.button("Show Dataset Preview"):
try:
df_csv = pd.read_csv(CSV_FILE_PATH)
st.dataframe(df_csv.head(1000), use_container_width=True)
except Exception as e:
st.error(f"Error loading dataset: {e}")