import streamlit as st import requests import pandas as pd import plotly.express as px import matplotlib.pyplot as plt API_BASE_URL = "http://localhost:8000" CSV_FILE_PATH = "src/data/merged_yt_data.csv" KAGGLE_LINK = "https://www.kaggle.com/datasets/rsrishav/youtube-trending-video-dataset?select=IN_category_id.json" st.set_page_config( page_title="YouTube Trending Insights", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # Sidebar with Logo and Select Box with st.sidebar: st.markdown("""

""", unsafe_allow_html=True) st.markdown("

📊 YouTube Analytics

", unsafe_allow_html=True) # Kaggle Dataset Link st.sidebar.markdown("""

📂 Kaggle Dataset

""".format(KAGGLE_LINK), unsafe_allow_html=True) # Analysis Options options = { "📈 Trending Videos Over Time": "/trending_videos_count", "🥧 Most Popular Categories": "/most_popular_categories", "📊 Like Ratio Distribution": "/engagement/like_ratio_distribution", "👍 Top Liked Videos": "/engagement/top_liked_videos", "🏆 Top Trending Channels": "/channel-performance/top-trending", "📅 Channel Growth Over Time": "/channel-performance/growth-over-time", "❤️ Category Like-View Ratio": "/category-like-view-ratio", "💬 Category Comment Engagement": "/category-comment-engagement" } selected_option = st.selectbox("Choose an analysis:", list(options.keys()), help="Select a visualization to explore YouTube trends") def fetch_data(endpoint): try: response = requests.get(f"{API_BASE_URL}{endpoint}") response.raise_for_status() return response.json() except requests.RequestException as e: st.error(f"Failed to fetch data: {e}") return None st.markdown(f"

{selected_option}

", unsafe_allow_html=True) data = fetch_data(options[selected_option]) if data: if "Trending Videos" in selected_option: df = pd.DataFrame(data["trending_video_counts"].items(), columns=["Date", "Count"]) df["Date"] = pd.to_datetime(df["Date"]) fig = px.line(df, x="Date", y="Count", title="Trending Videos Over Time") st.plotly_chart(fig, use_container_width=True) elif "Popular Categories" in selected_option: df = pd.DataFrame.from_dict(data["most_popular_categories"], orient='index', columns=["Count"]) fig = px.pie(df, names=df.index, values="Count", title="Popular Categories", hole=0.4, color_discrete_sequence=px.colors.sequential.RdBu) st.plotly_chart(fig, use_container_width=True) elif "Like Ratio" in selected_option: df = pd.DataFrame(data["like_ratio_distribution"]) fig = px.histogram(df, x="like_ratio", nbins=50, title="Like Ratio Distribution", color_discrete_sequence=['#ff0000']) st.plotly_chart(fig, use_container_width=True) elif "Top Liked Videos" in selected_option: df = pd.DataFrame(data["top_liked_videos"]) fig = px.bar(df, x="title", y="likes", title="🔝 Top Liked Videos", color="likes", color_continuous_scale="Reds") st.plotly_chart(fig, use_container_width=True) elif "Top Trending Channels" in selected_option: df = pd.DataFrame(data["top_trending_channels"].items(), columns=["Channel", "Trending Count"]) df = df.sort_values(by="Trending Count", ascending=False).head(10) fig = px.bar(df, x="Channel", y="Trending Count", title="Top Trending Channels", color="Trending Count", color_continuous_scale="Reds") st.plotly_chart(fig, use_container_width=True) elif "Channel Growth" in selected_option: df = pd.DataFrame(data) fig = px.line(df, x="published_month", y="video_count", color="channelTitle", title="Channel Growth Over Time", line_shape="spline") st.plotly_chart(fig, use_container_width=True) elif "Like-View Ratio" in selected_option: df = pd.DataFrame(data["data"]) fig = px.sunburst(df, path=["category_name"], values="like_view_ratio", title="Category Like-View Ratio", color="like_view_ratio", color_continuous_scale="RdYlBu") st.plotly_chart(fig, use_container_width=True) elif "Comment Engagement" in selected_option: df = pd.DataFrame(data["data"]) fig = px.treemap(df, path=["category_name"], values="comment_count", title="Category Comment Engagement", color="comment_count", color_continuous_scale="Blues") st.plotly_chart(fig, use_container_width=True) # Dataset Preview st.sidebar.markdown("

📋 Dataset Preview

", unsafe_allow_html=True) with st.sidebar.expander("View Raw Dataset", expanded=False): if st.button("Show Dataset Preview"): try: df_csv = pd.read_csv(CSV_FILE_PATH) st.dataframe(df_csv.head(1000), use_container_width=True) except Exception as e: st.error(f"Error loading dataset: {e}")