File size: 4,416 Bytes
2fe303b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import json
from collections import Counter, defaultdict
import os

def analyze_spotify_data(files):
    # files: list of file objects
    # We'll parse each JSON file and aggregate the data
    
    all_records = []
    for f in files:
        try:
            data = json.load(open(f))
            if isinstance(data, list):
                all_records.extend(data)
            else:
                # If the JSON file isn't a list at top-level, skip or handle differently
                continue
        except:
            # If there's an error in loading JSON, skip that file
            continue
    
    # If no valid data found
    if not all_records:
        return "No valid data found in the uploaded files."

    # Aggregate listening stats
    artist_counter = Counter()
    track_counter = Counter()
    album_counter = Counter()  # Note: album info not provided in the sample data, will only work if album is in the data
    
    # We also want to consider total listening time per artist/track/album
    artist_time = defaultdict(int)
    track_time = defaultdict(int)
    album_time = defaultdict(int)

    # Attempt to detect if albumName is present
    album_present = all("albumName" in record for record in all_records if isinstance(record, dict))

    for record in all_records:
        if not isinstance(record, dict):
            continue
        artist = record.get("artistName", "Unknown Artist")
        track = record.get("trackName", "Unknown Track")
        ms_played = record.get("msPlayed", 0)
        # Album may not be present; handle gracefully
        album = record.get("albumName", "Unknown Album") if album_present else None

        artist_counter[artist] += 1
        track_counter[track] += 1
        artist_time[artist] += ms_played
        track_time[track] += ms_played
        
        if album_present and album is not None:
            album_counter[album] += 1
            album_time[album] += ms_played

    # Determine top artists by number of tracks played (frequency) and also by time
    top_artists_by_count = artist_counter.most_common(10)
    top_artists_by_time = sorted(artist_time.items(), key=lambda x: x[1], reverse=True)[:10]

    # Determine top tracks by frequency and by time
    top_tracks_by_count = track_counter.most_common(10)
    top_tracks_by_time = sorted(track_time.items(), key=lambda x: x[1], reverse=True)[:10]

    # Determine top albums if available
    if album_present:
        top_albums_by_count = album_counter.most_common(10)
        top_albums_by_time = sorted(album_time.items(), key=lambda x: x[1], reverse=True)[:10]
    else:
        top_albums_by_count = [("No album data found", 0)]
        top_albums_by_time = [("No album data found", 0)]

    # Format the results into a readable output
    def format_list(title, data_list, time_data=False):
        result = f"**{title}**\n"
        if not time_data:
            for i, (name, count) in enumerate(data_list, 1):
                result += f"{i}. {name} ({count} plays)\n"
        else:
            for i, (name, ms) in enumerate(data_list, 1):
                hours = ms / (1000*60*60)
                result += f"{i}. {name} ({hours:.2f} hours)\n"
        result += "\n"
        return result

    output = ""
    output += format_list("Top Artists by Play Count", top_artists_by_count, time_data=False)
    output += format_list("Top Artists by Listening Time", top_artists_by_time, time_data=True)
    output += format_list("Top Tracks by Play Count", top_tracks_by_count, time_data=False)
    output += format_list("Top Tracks by Listening Time", top_tracks_by_time, time_data=True)
    output += format_list("Top Albums by Play Count", top_albums_by_count, time_data=False)
    output += format_list("Top Albums by Listening Time", top_albums_by_time, time_data=True)

    return output

with gr.Blocks() as demo:
    gr.Markdown("# Spotify Listening Data Analyzer")
    gr.Markdown("Upload your Spotify JSON files (e.g., 'StreamingHistory0.json', 'StreamingHistory1.json', etc.) to get an overview of your top artists, albums, and tracks.")

    file_input = gr.File(file_count="multiple", type="filepath", label="Upload JSON files")
    analyze_button = gr.Button("Analyze")
    output_box = gr.Markdown()

    analyze_button.click(fn=analyze_spotify_data, inputs=file_input, outputs=output_box)

if __name__ == "__main__":
    demo.launch()