molehh's picture
modifed docker
89457d3
import streamlit as st
import requests
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
API_BASE_URL = "http://localhost:8000"
CSV_FILE_PATH = "src/data/merged_yt_data.csv"
KAGGLE_LINK = "https://www.kaggle.com/datasets/rsrishav/youtube-trending-video-dataset?select=IN_category_id.json"
st.set_page_config(
page_title="YouTube Trending Insights",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown("""
<style>
.main {
background-color: #f5f5f5;
padding: 20px;
border-radius: 10px;
}
.title {
color: #ff0000;
font-family: 'Arial', sans-serif;
text-align: center;
padding: 20px 0;
}
.subtitle {
color: #333333;
font-family: 'Arial', sans-serif;
padding: 10px 0;
}
.stButton>button {
background-color: #ff0000;
color: white;
border-radius: 5px;
padding: 10px 20px;
}
.stButton>button:hover {
background-color: #cc0000;
}
</style>
""", unsafe_allow_html=True)
# Sidebar with Logo and Select Box
with st.sidebar:
st.markdown("""
<div style='text-align:center;'>
<img src='https://upload.wikimedia.org/wikipedia/commons/b/b8/YouTube_Logo_2017.svg' width='80%'>
</div>
""", unsafe_allow_html=True)
st.markdown("<h2 class='subtitle'>πŸ“Š YouTube Analytics</h2>", unsafe_allow_html=True)
# Kaggle Dataset Link
st.sidebar.markdown("""
<h3 style='text-align:center;'>πŸ“‚ Kaggle Dataset</h3>
<p style='text-align:center;'>
<a href='{}' target='_blank' style='text-decoration:none;'>
<button style='background-color:#ff0000; color:white; padding:10px 20px; border:none; border-radius:5px; cursor:pointer;'>
πŸ”— Open Kaggle Dataset
</button>
</a>
</p>
""".format(KAGGLE_LINK), unsafe_allow_html=True)
# Analysis Options
options = {
"πŸ“ˆ Trending Videos Over Time": "/trending_videos_count",
"πŸ₯§ Most Popular Categories": "/most_popular_categories",
"πŸ“Š Like Ratio Distribution": "/engagement/like_ratio_distribution",
"πŸ‘ Top Liked Videos": "/engagement/top_liked_videos",
"πŸ† Top Trending Channels": "/channel-performance/top-trending",
"πŸ“… Channel Growth Over Time": "/channel-performance/growth-over-time",
"❀️ Category Like-View Ratio": "/category-like-view-ratio",
"πŸ’¬ Category Comment Engagement": "/category-comment-engagement"
}
selected_option = st.selectbox("Choose an analysis:", list(options.keys()),
help="Select a visualization to explore YouTube trends")
def fetch_data(endpoint):
try:
response = requests.get(f"{API_BASE_URL}{endpoint}")
response.raise_for_status()
return response.json()
except requests.RequestException as e:
st.error(f"Failed to fetch data: {e}")
return None
st.markdown(f"<h2 class='subtitle'>{selected_option}</h2>", unsafe_allow_html=True)
data = fetch_data(options[selected_option])
if data:
if "Trending Videos" in selected_option:
df = pd.DataFrame(data["trending_video_counts"].items(), columns=["Date", "Count"])
df["Date"] = pd.to_datetime(df["Date"])
fig = px.line(df, x="Date", y="Count", title="Trending Videos Over Time")
st.plotly_chart(fig, use_container_width=True)
elif "Popular Categories" in selected_option:
df = pd.DataFrame.from_dict(data["most_popular_categories"], orient='index', columns=["Count"])
fig = px.pie(df, names=df.index, values="Count", title="Popular Categories",
hole=0.4, color_discrete_sequence=px.colors.sequential.RdBu)
st.plotly_chart(fig, use_container_width=True)
elif "Like Ratio" in selected_option:
df = pd.DataFrame(data["like_ratio_distribution"])
fig = px.histogram(df, x="like_ratio", nbins=50, title="Like Ratio Distribution",
color_discrete_sequence=['#ff0000'])
st.plotly_chart(fig, use_container_width=True)
elif "Top Liked Videos" in selected_option:
df = pd.DataFrame(data["top_liked_videos"])
fig = px.bar(df, x="title", y="likes", title="πŸ” Top Liked Videos",
color="likes", color_continuous_scale="Reds")
st.plotly_chart(fig, use_container_width=True)
elif "Top Trending Channels" in selected_option:
df = pd.DataFrame(data["top_trending_channels"].items(), columns=["Channel", "Trending Count"])
df = df.sort_values(by="Trending Count", ascending=False).head(10)
fig = px.bar(df, x="Channel", y="Trending Count", title="Top Trending Channels",
color="Trending Count", color_continuous_scale="Reds")
st.plotly_chart(fig, use_container_width=True)
elif "Channel Growth" in selected_option:
df = pd.DataFrame(data)
fig = px.line(df, x="published_month", y="video_count", color="channelTitle",
title="Channel Growth Over Time", line_shape="spline")
st.plotly_chart(fig, use_container_width=True)
elif "Like-View Ratio" in selected_option:
df = pd.DataFrame(data["data"])
fig = px.sunburst(df, path=["category_name"], values="like_view_ratio",
title="Category Like-View Ratio", color="like_view_ratio",
color_continuous_scale="RdYlBu")
st.plotly_chart(fig, use_container_width=True)
elif "Comment Engagement" in selected_option:
df = pd.DataFrame(data["data"])
fig = px.treemap(df, path=["category_name"], values="comment_count",
title="Category Comment Engagement", color="comment_count",
color_continuous_scale="Blues")
st.plotly_chart(fig, use_container_width=True)
# Dataset Preview
st.sidebar.markdown("<h2 class='subtitle'>πŸ“‹ Dataset Preview</h2>", unsafe_allow_html=True)
with st.sidebar.expander("View Raw Dataset", expanded=False):
if st.button("Show Dataset Preview"):
try:
df_csv = pd.read_csv(CSV_FILE_PATH)
st.dataframe(df_csv.head(1000), use_container_width=True)
except Exception as e:
st.error(f"Error loading dataset: {e}")