File size: 10,802 Bytes
cf4f63b
e190970
0d2ed88
cf4f63b
0d2ed88
 
a1d780d
 
e638825
cf4f63b
e190970
 
 
cf4f63b
 
b3bb8cb
e638825
3c445df
cf4f63b
e190970
3c445df
c0ec2ff
cf4f63b
e638825
cf4f63b
4615d65
 
 
 
 
 
cf4f63b
 
 
e190970
3c445df
c0ec2ff
4615d65
cf4f63b
c0ec2ff
e190970
4615d65
c0ec2ff
 
 
 
 
 
 
 
 
 
e190970
c0ec2ff
 
 
 
cf4f63b
c0ec2ff
 
cf4f63b
c0ec2ff
 
 
 
 
e190970
3c445df
c0ec2ff
cf4f63b
0d2ed88
cf4f63b
0d2ed88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4f63b
0d2ed88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4f63b
 
 
 
0d2ed88
 
cf4f63b
 
 
0d2ed88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4f63b
 
e190970
3c445df
c0ec2ff
cf4f63b
e638825
 
 
 
 
 
 
 
 
0d2ed88
e638825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4f63b
a1d780d
3c445df
a1d780d
3c445df
a1d780d
 
 
 
3c445df
a1d780d
 
 
 
 
 
 
 
3c445df
a1d780d
 
3c445df
a1d780d
3c445df
a1d780d
 
 
 
 
3c445df
 
 
 
 
 
 
a1d780d
3c445df
a1d780d
3c445df
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import datetime
from concurrent.futures import as_completed
from requests.adapters import HTTPAdapter
from urllib import parse
import requests
import json
import pandas as pd

import streamlit as st
import wandb
from requests_futures.sessions import FuturesSession

from dashboard_utils.time_tracker import _log, simple_time_tracker

URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
WANDB_REPO = st.secrets["WANDB_REPO_INDIVIDUAL_METRICS"]  
CACHE_TTL = 100
MAX_DELTA_ACTIVE_RUN_SEC = 60 * 5


@st.cache(ttl=CACHE_TTL, show_spinner=False)
@simple_time_tracker(_log)
def get_new_bubble_data():
    serialized_data_points, latest_timestamp = get_serialized_data_points()
    serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)

    usernames = []
    for item in serialized_data["points"][0]:
        usernames.append(item["profileId"])

    profiles = get_profiles(usernames)

    return serialized_data, profiles


@st.cache(ttl=CACHE_TTL, show_spinner=False)
@simple_time_tracker(_log)
def get_profiles(usernames):
    profiles = []
    with FuturesSession() as session:
        futures = []
        for username in usernames:
            future = session.get(URL_QUICKSEARCH + parse.urlencode({"type": "user", "q": username}))
            future.username = username
            futures.append(future)
        for future in as_completed(futures):
            resp = future.result()
            username = future.username
            response = resp.json()
            avatarUrl = None
            if response["users"]:
                for user_candidate in response["users"]:
                    if user_candidate["user"] == username:
                        avatarUrl = response["users"][0]["avatarUrl"]
                        break
            if not avatarUrl:
                avatarUrl = "/avatars/57584cb934354663ac65baa04e6829bf.svg"

            if avatarUrl.startswith("/avatars/"):
                avatarUrl = f"https://huggingface.co{avatarUrl}"

            profiles.append(
                {"id": username, "name": username, "src": avatarUrl, "url": f"https://huggingface.co/{username}"}
            )
    return profiles


@st.cache(ttl=CACHE_TTL, show_spinner=False)
@simple_time_tracker(_log)
def get_serialized_data_points():
    url = "https://api.wandb.ai/graphql"
    api = wandb.Api()

    # Get the run ids
    json_query_run_names = {
        "operationName":"WandbConfig",
        "variables":{"limit":100000,"entityName":"learning-at-home","projectName":"dalle-hivemind-trainers","filters":"{\"$and\":[{\"$or\":[{\"$and\":[]}]},{\"$and\":[]},{\"$or\":[{\"$and\":[{\"$or\":[{\"$and\":[]}]},{\"$and\":[{\"name\":{\"$ne\":null}}]}]}]}]}","order":"-state"},
        "query": """ query WandbConfig($projectName: String!, $entityName: String!, $filters: JSONString, $limit: Int = 100, $order: String) {
    project(name: $projectName, entityName: $entityName) {
    id
    runs(filters: $filters, first: $limit, order: $order) {
    edges {
    node {
    id
    name
    __typename
    }
    __typename
    }
    __typename
    }
    __typename
    }
    }
    """}

    s = requests.Session()
    s.mount(url, HTTPAdapter(max_retries=5))

    resp = s.post(
        headers={"User-Agent": api.user_agent, "Use-Admin-Privileges": "true", 'content-type': 'application/json'},
        auth=("api", api.api_key),
        url=url,
        data=json.dumps(json_query_run_names)
    )
    json_metrics = resp.json()
    run_names = [run['node']["name"] for run in json_metrics['data']['project']["runs"]['edges']]

    # Get info of each run
    with FuturesSession() as session:
        futures = []
        for run_name in run_names:
            json_query_by_run = {
                "operationName":"Run",
                "variables":{"entityName":"learning-at-home","projectName":"dalle-hivemind-trainers", "runName":run_name},
                "query":"""query Run($projectName: String!, $entityName: String, $runName: String!) {
                    project(name: $projectName, entityName: $entityName) {
                        id
                        name
                        createdAt
                        run(name: $runName) {
                        id
                        name
                        displayName
                        state
                        summaryMetrics
                        runInfo {
                            gpu
                            }
                            __typename
                        }
                        __typename
                        }
                    }
                    """}

            future = session.post(
                headers={"User-Agent": api.user_agent, "Use-Admin-Privileges": "true", 'content-type': 'application/json'},
                auth=("api", api.api_key),
                url=url,
                data=json.dumps(json_query_by_run)
            )
            futures.append(future)
        
        serialized_data_points = {}
        latest_timestamp = None
        for future in as_completed(futures):
            resp = future.result()
            json_metrics = resp.json()

            data = json_metrics.get("data", None)
            if data is None:
                continue
            
            project = data.get("project", None)
            if project is None:
                continue

            run = project.get("run", None)
            if run is None:
                continue

            runInfo = run.get("runInfo", None) 
            if runInfo is None:
                gpu_type = None
            else:
                gpu_type = runInfo.get("gpu", None)

            summaryMetrics = run.get("summaryMetrics", None)
            if summaryMetrics is not None:
                run_summary = json.loads(summaryMetrics)

            state = run.get("state", None)
            if state is None:
                continue

            displayName = run.get("displayName", None)
            if displayName is None:
                continue

            if displayName in serialized_data_points:
                if "_timestamp" in run_summary and "_step" in run_summary:
                    timestamp = run_summary["_timestamp"]
                    serialized_data_points[displayName]["Runs"].append(
                        {
                            "batches": run_summary["_step"],
                            "runtime": run_summary["_runtime"],
                            "loss": run_summary["train/loss"],
                            "gpu_type": gpu_type,
                            "state": state,
                            "velocity": run_summary["_step"] / run_summary["_runtime"],
                            "date": datetime.datetime.utcfromtimestamp(timestamp),
                        }
                    )
                    if not latest_timestamp or timestamp > latest_timestamp:
                        latest_timestamp = timestamp
            else:
                if "_timestamp" in run_summary and "_step" in run_summary:
                    timestamp = run_summary["_timestamp"]
                    serialized_data_points[displayName] = {
                        "profileId": displayName,
                        "Runs": [
                            {
                                "batches": run_summary["_step"],
                                "gpu_type": gpu_type,
                                "state": state,
                                "runtime": run_summary["_runtime"],
                                "loss": run_summary["train/loss"],
                                "velocity": run_summary["_step"] / run_summary["_runtime"],
                                "date": datetime.datetime.utcfromtimestamp(timestamp),
                            }
                        ],
                    }
                    if not latest_timestamp or timestamp > latest_timestamp:
                        latest_timestamp = timestamp
        latest_timestamp = datetime.datetime.utcfromtimestamp(latest_timestamp)
    return serialized_data_points, latest_timestamp


@st.cache(ttl=CACHE_TTL, show_spinner=False)
@simple_time_tracker(_log)
def get_serialized_data(serialized_data_points, latest_timestamp):
    serialized_data_points_v2 = []
    max_velocity = 1
    for run_name, serialized_data_point in serialized_data_points.items():
        activeRuns = []
        loss = 0
        runtime = 0
        batches = 0
        velocity = 0
        for run in serialized_data_point["Runs"]:
            if run["state"] == "running":
                run["date"] = run["date"].isoformat()
                activeRuns.append(run)
                loss += run["loss"]
                velocity += run["velocity"]
            loss = loss / len(activeRuns) if activeRuns else 0
            runtime += run["runtime"]
            batches += run["batches"]
        new_item = {
            "date": latest_timestamp.isoformat(),
            "profileId": run_name,
            "batches": batches,
            "runtime": runtime,
            "activeRuns": activeRuns,
        }
        serialized_data_points_v2.append(new_item)
    serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
    return serialized_data


def get_leaderboard(serialized_data):
    data_leaderboard = {"user": [], "runtime": []}

    for user_item in serialized_data["points"][0]:
        data_leaderboard["user"].append(user_item["profileId"])
        data_leaderboard["runtime"].append(user_item["runtime"])

    df = pd.DataFrame(data_leaderboard)
    df = df.sort_values("runtime", ascending=False)
    df["runtime"] = df["runtime"].apply(lambda x: datetime.timedelta(seconds=x))
    df["runtime"] = df["runtime"].apply(lambda x: str(x))

    df.reset_index(drop=True, inplace=True)
    df.rename(columns={"user": "User", "runtime": "Total time contributed"}, inplace=True)
    df["Rank"] = df.index + 1
    df = df.set_index("Rank")
    return df


def get_global_metrics(serialized_data):
    current_time = datetime.datetime.utcnow()
    num_contributing_users = len(serialized_data["points"][0])
    num_active_users = 0
    total_runtime = 0

    for user_item in serialized_data["points"][0]:
        for run in user_item["activeRuns"]:
            date_run = datetime.datetime.fromisoformat(run["date"])
            delta_time_sec = (current_time - date_run).total_seconds()
            if delta_time_sec < MAX_DELTA_ACTIVE_RUN_SEC:
                num_active_users += 1
                break

        total_runtime += user_item["runtime"]

    total_runtime = datetime.timedelta(seconds=total_runtime)
    return {
        "num_contributing_users": num_contributing_users,
        "num_active_users": num_active_users,
        "total_runtime": total_runtime,
    }