Spaces:

molehh
/

YouTube_Trend_Analyzer

Sleeping

App Files Files Community

molehh commited on Mar 9

Commit

74bdacd

1 Parent(s): 879815b

youtube trend analyer project

Browse files

Files changed (16) hide show

.gitignore +2 -0
requirements.txt +7 -0
src/backend/__pycache__/main.cpython-313.pyc +0 -0
src/backend/__pycache__/trending_videos_page.cpython-313.pyc +0 -0
src/backend/main.py +18 -0
src/backend/routes/__init__.py +0 -0
src/backend/routes/__pycache__/__init__.cpython-313.pyc +0 -0
src/backend/routes/__pycache__/data_visualization_page_api.cpython-313.pyc +0 -0
src/backend/routes/__pycache__/homepage_api.cpython-313.pyc +0 -0
src/backend/routes/__pycache__/time_trend.cpython-313.pyc +0 -0
src/backend/routes/data_visualization_page_api.py +61 -0
src/backend/routes/homepage_api.py +7 -0
src/backend/trending_videos_page.py +113 -0
src/frontend/app.py +164 -0
src/services/__pycache__/data_processing.cpython-313.pyc +0 -0
src/services/data_processing.py +35 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv/
2	+ data

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi[standard]
+pandas
+sqlalchemy
+streamlit
+requests
+plotly
+matplotlib

src/backend/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (530 Bytes). View file

src/backend/__pycache__/trending_videos_page.cpython-313.pyc ADDED Viewed

Binary file (7.28 kB). View file

src/backend/main.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from fastapi import FastAPI
+from routes import homepage_api
+from routes import data_visualization_page_api
+app = FastAPI()
+app.include_router(homepage_api.router,tags=["Home"])
+app.include_router(data_visualization_page_api.router,tags=["Data"])

src/backend/routes/__init__.py ADDED Viewed

File without changes

src/backend/routes/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (171 Bytes). View file

src/backend/routes/__pycache__/data_visualization_page_api.cpython-313.pyc ADDED Viewed

Binary file (3.9 kB). View file

src/backend/routes/__pycache__/homepage_api.cpython-313.pyc ADDED Viewed

Binary file (455 Bytes). View file

src/backend/routes/__pycache__/time_trend.cpython-313.pyc ADDED Viewed

Binary file (279 Bytes). View file

src/backend/routes/data_visualization_page_api.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from fastapi import APIRouter
+import trending_videos_page
+from sqlalchemy.orm import Session
+router=APIRouter()
+@router.get("/trending_videos_count")
+def trending_videos_count():
+    data = trending_videos_page.get_trending_videos_count()
+    return {"trending_video_counts": data.to_dict()}
+@router.get("/most_popular_categories")
+def most_popular_categories():
+    data = trending_videos_page.get_most_popular_categories()
+    return {"most_popular_categories": data.to_dict()}
+@router.get("/engagement/like_ratio_distribution")
+def like_ratio_distribution():
+    data = trending_videos_page.get_like_ratio_distribution()
+    return {"like_ratio_distribution": data.to_dict(orient="records")}
+@router.get("/engagement/top_liked_videos")
+def top_liked_videos():
+    data = trending_videos_page.get_top_liked_videos()
+    return {"top_liked_videos": data.to_dict(orient="records")}
+@router.get("/channel-performance/top-trending")
+def top_trending_channels():
+    data = trending_videos_page.get_trending_channels()
+    channel_counts = data["channelTitle"].value_counts().to_dict()
+    return {"top_trending_channels": channel_counts}
+@router.get("/channel-performance/growth-over-time")
+def channel_growth_over_time():
+    growth_data =trending_videos_page.calculate_channel_growth()
+    return growth_data.to_dict(orient="records")
+@router.get("/analysis")
+def get_tags_analysis():
+    popular_tags = trending_videos_page.process_tags()
+    return popular_tags
+@router.get("/duration_vs_likes")
+def get_trending_by_day():
+    analyze_trending_daily=trending_videos_page.analyze_upload_patterns("day")
+    return analyze_trending_daily
+@router.get("/category-like-view-ratio")
+def get_category_like_view_ratio():
+    data = trending_videos_page.category_like_view_ratio()
+    return {"data": data}
+@router.get("/category-comment-engagement")
+def get_category_comment_engagement():
+    data = trending_videos_page.category_comment_engagement()
+    return {"data": data}

src/backend/routes/homepage_api.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import APIRouter
+router = APIRouter()
+@router.get("/")
+def home():
+    return {"Home" :"welome home page"}

src/backend/trending_videos_page.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import sys
+src_directory = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), "../..", "src"))
+sys.path.append(src_directory)
+from services import data_processing
+import pandas as pd
+from collections import Counter
+from sqlalchemy.orm import Session
+def get_trending_videos_count():
+   df = data_processing.get_updated_df()
+   trending_counts= df.groupby(["trending_date"])['trending_date'].count()
+   return trending_counts
+def get_most_popular_categories():
+    df = data_processing.get_updated_df()
+    category_counts = df.groupby(["category_name"])["category_name"].count()
+    return category_counts
+def get_views_vs_likes():
+   df = data_processing.get_updated_df()
+   scatter_data = df[["views", "likes"]].dropna()
+   return scatter_data
+def get_like_ratio_distribution():
+    df = data_processing.get_updated_df()
+    data= df[["video_id","views","likes"]].copy()
+    data = data[data["views"] > 0]
+    data["like_ratio"] = data["likes"] / data["views"]
+    return data
+def get_top_liked_videos(top_n=10):
+    df = data_processing.get_updated_df()
+    top_videos = df[["title", "likes"]].dropna().sort_values(by="likes", ascending=False).head(10)
+    return top_videos
+def get_trending_channels():
+    df = data_processing.get_updated_df()
+    data = df[["channelTitle","publishedAt"]].copy()
+    return data
+def calculate_channel_growth():
+    data = data_processing.get_updated_df()
+    data["publishedAt"] = pd.to_datetime(data["publishedAt"], errors="coerce")
+    data.dropna(subset=["publishedAt"], inplace=True)
+    data["published_month"] = data["publishedAt"].dt.to_period("M").astype(str)
+    grouped_data = data.groupby(["published_month", "channelTitle"]).size().reset_index(name="video_count")
+    return grouped_data
+def process_tags():
+        data = data_processing.get_updated_df()
+        tags = data["tags"].dropna().str.lower().str.split("|")
+        all_tags = [tag.strip() for sublist in tags for tag in sublist if tag.strip()]
+        tag_counts = Counter(all_tags)
+        tag_data = [{"tag": tag, "count": count} for tag, count in tag_counts.items()]
+        return {"tags": tag_data}
+def analyze_trending_duration():
+    data = data_processing.get_updated_df()
+    trending_days = data.groupby("video_id")["trending_date"].count().reset_index()
+    trending_days.columns = ["video_id", "days_trending"]
+    views_growth = data.groupby("trending_date")["views"].mean().reset_index()
+    return {
+        "lifespan": trending_days.to_dict(orient="records"),
+        "views_growth": views_growth.to_dict(orient="records")
+    }
+def analyze_upload_patterns(mode: str):
+    data = data_processing.get_updated_df()
+    data["publishedAt"] = pd.to_datetime(data["publishedAt"], errors="coerce")
+    if mode == "hour":
+        data["upload_hour"] = data["publishedAt"].dt.hour
+        hourly_counts = data["upload_hour"].value_counts().sort_index().reset_index()
+        hourly_counts.columns = ["hour", "count"]
+        return hourly_counts.to_dict(orient="records")
+    elif mode == "day":
+        data["upload_day"] = data["publishedAt"].dt.day_name()
+        daily_counts = data["upload_day"].value_counts().reindex(
+            ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
+        ).reset_index()
+        daily_counts.columns = ["day", "count"]
+        return daily_counts.to_dict(orient="records")
+    return {"error": "Invalid mode"}
+def category_like_view_ratio():
+    data = data_processing.get_updated_df()
+    data["like_view_ratio"] = data["likes"] / data["views"]
+    category_data = data.groupby("category_name")["like_view_ratio"].mean().reset_index()
+    return category_data.to_dict(orient="records")
+def category_comment_engagement():
+    data = data_processing.get_updated_df()
+    return data[["category_name", "comment_count", "views", "likes"]].dropna().to_dict(orient="records")
+if __name__ == "__main__":
+    pass

src/frontend/app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import streamlit as st
+import requests
+import pandas as pd
+import plotly.express as px
+import matplotlib.pyplot as plt
+API_BASE_URL = "http://127.0.0.1:8000"
+CSV_FILE_PATH = "src/data/merged_yt_data.csv"
+KAGGLE_LINK = "https://www.kaggle.com/datasets/rsrishav/youtube-trending-video-dataset?select=IN_category_id.json"
+st.set_page_config(
+    page_title="YouTube Trending Insights",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+st.markdown("""
+    <style>
+    .main {
+        background-color: #f5f5f5;
+        padding: 20px;
+        border-radius: 10px;
+    }
+    .title {
+        color: #ff0000;
+        font-family: 'Arial', sans-serif;
+        text-align: center;
+        padding: 20px 0;
+    }
+    .subtitle {
+        color: #333333;
+        font-family: 'Arial', sans-serif;
+        padding: 10px 0;
+    }
+    .stButton>button {
+        background-color: #ff0000;
+        color: white;
+        border-radius: 5px;
+        padding: 10px 20px;
+    }
+    .stButton>button:hover {
+        background-color: #cc0000;
+    }
+    </style>
+""", unsafe_allow_html=True)
+# Sidebar with Logo and Select Box
+with st.sidebar:
+    st.markdown("""
+        <div style='text-align:center;'>
+            <img src='https://upload.wikimedia.org/wikipedia/commons/b/b8/YouTube_Logo_2017.svg' width='80%'>
+        </div>
+    """, unsafe_allow_html=True)
+    st.markdown("<h2 class='subtitle'>📊 YouTube Analytics</h2>", unsafe_allow_html=True)
+       # Kaggle Dataset Link
+    st.sidebar.markdown("""
+        <h3 style='text-align:center;'>📂 Kaggle Dataset</h3>
+        <p style='text-align:center;'>
+            <a href='{}' target='_blank' style='text-decoration:none;'>
+                <button style='background-color:#ff0000; color:white; padding:10px 20px; border:none; border-radius:5px; cursor:pointer;'>
+                    🔗 Open Kaggle Dataset
+                </button>
+            </a>
+        </p>
+    """.format(KAGGLE_LINK), unsafe_allow_html=True)
+    # Analysis Options
+    options = {
+        "📈 Trending Videos Over Time": "/trending_videos_count",
+        "🥧 Most Popular Categories": "/most_popular_categories",
+        "📊 Like Ratio Distribution": "/engagement/like_ratio_distribution",
+        "👍 Top Liked Videos": "/engagement/top_liked_videos",
+        "🏆 Top Trending Channels": "/channel-performance/top-trending",
+        "📅 Channel Growth Over Time": "/channel-performance/growth-over-time",
+        "❤️ Category Like-View Ratio": "/category-like-view-ratio",
+        "💬 Category Comment Engagement": "/category-comment-engagement"
+    }
+    selected_option = st.selectbox("Choose an analysis:", list(options.keys()),
+                                 help="Select a visualization to explore YouTube trends")
+def fetch_data(endpoint):
+    try:
+        response = requests.get(f"{API_BASE_URL}{endpoint}")
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        st.error(f"Failed to fetch data: {e}")
+        return None
+st.markdown(f"<h2 class='subtitle'>{selected_option}</h2>", unsafe_allow_html=True)
+data = fetch_data(options[selected_option])
+if data:
+    if "Trending Videos" in selected_option:
+        df = pd.DataFrame(data["trending_video_counts"].items(), columns=["Date", "Count"])
+        df["Date"] = pd.to_datetime(df["Date"])
+        fig = px.line(df, x="Date", y="Count", title="Trending Videos Over Time")
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Popular Categories" in selected_option:
+        df = pd.DataFrame.from_dict(data["most_popular_categories"], orient='index', columns=["Count"])
+        fig = px.pie(df, names=df.index, values="Count", title="Popular Categories",
+                    hole=0.4, color_discrete_sequence=px.colors.sequential.RdBu)
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Like Ratio" in selected_option:
+        df = pd.DataFrame(data["like_ratio_distribution"])
+        fig = px.histogram(df, x="like_ratio", nbins=50, title="Like Ratio Distribution",
+                         color_discrete_sequence=['#ff0000'])
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Top Liked Videos" in selected_option:
+        df = pd.DataFrame(data["top_liked_videos"])
+        fig = px.bar(df, x="title", y="likes", title="🔝 Top Liked Videos",
+                    color="likes", color_continuous_scale="Reds")
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Top Trending Channels" in selected_option:
+        df = pd.DataFrame(data["top_trending_channels"].items(), columns=["Channel", "Trending Count"])
+        df = df.sort_values(by="Trending Count", ascending=False).head(10)
+        fig = px.bar(df, x="Channel", y="Trending Count", title="Top Trending Channels",
+                    color="Trending Count", color_continuous_scale="Reds")
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Channel Growth" in selected_option:
+        df = pd.DataFrame(data)
+        fig = px.line(df, x="published_month", y="video_count", color="channelTitle",
+                     title="Channel Growth Over Time", line_shape="spline")
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Like-View Ratio" in selected_option:
+        df = pd.DataFrame(data["data"])
+        fig = px.sunburst(df, path=["category_name"], values="like_view_ratio",
+                        title="Category Like-View Ratio", color="like_view_ratio",
+                        color_continuous_scale="RdYlBu")
+        st.plotly_chart(fig, use_container_width=True)
+    elif "Comment Engagement" in selected_option:
+        df = pd.DataFrame(data["data"])
+        fig = px.treemap(df, path=["category_name"], values="comment_count",
+                       title="Category Comment Engagement", color="comment_count",
+                       color_continuous_scale="Blues")
+        st.plotly_chart(fig, use_container_width=True)
+# Dataset Preview
+st.sidebar.markdown("<h2 class='subtitle'>📋 Dataset Preview</h2>", unsafe_allow_html=True)
+with st.sidebar.expander("View Raw Dataset", expanded=False):
+    if st.button("Show Dataset Preview"):
+        try:
+            df_csv = pd.read_csv(CSV_FILE_PATH)
+            st.dataframe(df_csv.head(1000), use_container_width=True)
+        except Exception as e:
+            st.error(f"Error loading dataset: {e}")

src/services/__pycache__/data_processing.cpython-313.pyc ADDED Viewed

Binary file (505 Bytes). View file

src/services/data_processing.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import pandas as pd
+csv_path = "src/data/yt_data.csv"
+UPDATED_CSV_PATH = "src/data/merged_yt_data.csv"
+# csv_df = pd.read_csv(csv_path)
+def get_updated_df():
+    final_csv_df = pd.read_csv(UPDATED_CSV_PATH)
+    return final_csv_df
+# df = get_updated_df()
+# print(df.columns)
+# # Load JSON file
+# json_path = "category_id.json"
+# json_df = pd.read_json(json_path)
+# # Extract category ID and title from JSON
+# category_df = pd.json_normalize(json_df["items"])
+# category_df = category_df[["id", "snippet.title"]].rename(
+#     columns={"id": "category_id", "snippet.title": "category_name"})
+# # Merge CSV data with category data on category_id
+# csv_df["category_id"] = csv_df["category_id"].astype(
+#     str)  # Ensure category_id is a string for merging
+# merged_df = pd.merge(csv_df, category_df, on="category_id",
+#                      how="left")  # Left join to keep all video data
+# # Save the merged DataFrame as a new CSV file
+# merged_path = "merged_yt_data.csv"
+# merged_df.to_csv(merged_path, index=False)