Spaces:
Running
Running
import streamlit as st | |
import requests | |
import pandas as pd | |
import plotly.express as px | |
import matplotlib.pyplot as plt | |
API_BASE_URL = "http://localhost:8000" | |
CSV_FILE_PATH = "src/data/merged_yt_data.csv" | |
KAGGLE_LINK = "https://www.kaggle.com/datasets/rsrishav/youtube-trending-video-dataset?select=IN_category_id.json" | |
st.set_page_config( | |
page_title="YouTube Trending Insights", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
st.markdown(""" | |
<style> | |
.main { | |
background-color: #f5f5f5; | |
padding: 20px; | |
border-radius: 10px; | |
} | |
.title { | |
color: #ff0000; | |
font-family: 'Arial', sans-serif; | |
text-align: center; | |
padding: 20px 0; | |
} | |
.subtitle { | |
color: #333333; | |
font-family: 'Arial', sans-serif; | |
padding: 10px 0; | |
} | |
.stButton>button { | |
background-color: #ff0000; | |
color: white; | |
border-radius: 5px; | |
padding: 10px 20px; | |
} | |
.stButton>button:hover { | |
background-color: #cc0000; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Sidebar with Logo and Select Box | |
with st.sidebar: | |
st.markdown(""" | |
<div style='text-align:center;'> | |
<img src='https://upload.wikimedia.org/wikipedia/commons/b/b8/YouTube_Logo_2017.svg' width='80%'> | |
</div> | |
""", unsafe_allow_html=True) | |
st.markdown("<h2 class='subtitle'>π YouTube Analytics</h2>", unsafe_allow_html=True) | |
# Kaggle Dataset Link | |
st.sidebar.markdown(""" | |
<h3 style='text-align:center;'>π Kaggle Dataset</h3> | |
<p style='text-align:center;'> | |
<a href='{}' target='_blank' style='text-decoration:none;'> | |
<button style='background-color:#ff0000; color:white; padding:10px 20px; border:none; border-radius:5px; cursor:pointer;'> | |
π Open Kaggle Dataset | |
</button> | |
</a> | |
</p> | |
""".format(KAGGLE_LINK), unsafe_allow_html=True) | |
# Analysis Options | |
options = { | |
"π Trending Videos Over Time": "/trending_videos_count", | |
"π₯§ Most Popular Categories": "/most_popular_categories", | |
"π Like Ratio Distribution": "/engagement/like_ratio_distribution", | |
"π Top Liked Videos": "/engagement/top_liked_videos", | |
"π Top Trending Channels": "/channel-performance/top-trending", | |
"π Channel Growth Over Time": "/channel-performance/growth-over-time", | |
"β€οΈ Category Like-View Ratio": "/category-like-view-ratio", | |
"π¬ Category Comment Engagement": "/category-comment-engagement" | |
} | |
selected_option = st.selectbox("Choose an analysis:", list(options.keys()), | |
help="Select a visualization to explore YouTube trends") | |
def fetch_data(endpoint): | |
try: | |
response = requests.get(f"{API_BASE_URL}{endpoint}") | |
response.raise_for_status() | |
return response.json() | |
except requests.RequestException as e: | |
st.error(f"Failed to fetch data: {e}") | |
return None | |
st.markdown(f"<h2 class='subtitle'>{selected_option}</h2>", unsafe_allow_html=True) | |
data = fetch_data(options[selected_option]) | |
if data: | |
if "Trending Videos" in selected_option: | |
df = pd.DataFrame(data["trending_video_counts"].items(), columns=["Date", "Count"]) | |
df["Date"] = pd.to_datetime(df["Date"]) | |
fig = px.line(df, x="Date", y="Count", title="Trending Videos Over Time") | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Popular Categories" in selected_option: | |
df = pd.DataFrame.from_dict(data["most_popular_categories"], orient='index', columns=["Count"]) | |
fig = px.pie(df, names=df.index, values="Count", title="Popular Categories", | |
hole=0.4, color_discrete_sequence=px.colors.sequential.RdBu) | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Like Ratio" in selected_option: | |
df = pd.DataFrame(data["like_ratio_distribution"]) | |
fig = px.histogram(df, x="like_ratio", nbins=50, title="Like Ratio Distribution", | |
color_discrete_sequence=['#ff0000']) | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Top Liked Videos" in selected_option: | |
df = pd.DataFrame(data["top_liked_videos"]) | |
fig = px.bar(df, x="title", y="likes", title="π Top Liked Videos", | |
color="likes", color_continuous_scale="Reds") | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Top Trending Channels" in selected_option: | |
df = pd.DataFrame(data["top_trending_channels"].items(), columns=["Channel", "Trending Count"]) | |
df = df.sort_values(by="Trending Count", ascending=False).head(10) | |
fig = px.bar(df, x="Channel", y="Trending Count", title="Top Trending Channels", | |
color="Trending Count", color_continuous_scale="Reds") | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Channel Growth" in selected_option: | |
df = pd.DataFrame(data) | |
fig = px.line(df, x="published_month", y="video_count", color="channelTitle", | |
title="Channel Growth Over Time", line_shape="spline") | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Like-View Ratio" in selected_option: | |
df = pd.DataFrame(data["data"]) | |
fig = px.sunburst(df, path=["category_name"], values="like_view_ratio", | |
title="Category Like-View Ratio", color="like_view_ratio", | |
color_continuous_scale="RdYlBu") | |
st.plotly_chart(fig, use_container_width=True) | |
elif "Comment Engagement" in selected_option: | |
df = pd.DataFrame(data["data"]) | |
fig = px.treemap(df, path=["category_name"], values="comment_count", | |
title="Category Comment Engagement", color="comment_count", | |
color_continuous_scale="Blues") | |
st.plotly_chart(fig, use_container_width=True) | |
# Dataset Preview | |
st.sidebar.markdown("<h2 class='subtitle'>π Dataset Preview</h2>", unsafe_allow_html=True) | |
with st.sidebar.expander("View Raw Dataset", expanded=False): | |
if st.button("Show Dataset Preview"): | |
try: | |
df_csv = pd.read_csv(CSV_FILE_PATH) | |
st.dataframe(df_csv.head(1000), use_container_width=True) | |
except Exception as e: | |
st.error(f"Error loading dataset: {e}") | |