File size: 10,231 Bytes
98f8d8e
 
 
 
e2e991a
 
 
 
9e2f4be
e2e991a
98f8d8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31ec36d
9e2f4be
98f8d8e
 
e2e991a
 
98f8d8e
e2e991a
98f8d8e
 
 
 
 
9e2f4be
98f8d8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c2c8f3
98f8d8e
9e2f4be
 
 
 
0c2c8f3
98f8d8e
9e2f4be
 
98f8d8e
9e2f4be
0c2c8f3
 
 
 
 
9e2f4be
 
 
e2e991a
9e2f4be
 
 
98f8d8e
0c2c8f3
98f8d8e
 
0c2c8f3
9e2f4be
0c2c8f3
 
 
 
 
9e2f4be
98f8d8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e2f4be
 
98f8d8e
 
9e2f4be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98f8d8e
 
9e2f4be
 
98f8d8e
 
 
9e2f4be
 
 
 
 
 
 
 
98f8d8e
 
9e2f4be
 
e2e991a
 
 
98f8d8e
9e2f4be
e2e991a
9e2f4be
e2e991a
9e2f4be
e2e991a
9e2f4be
 
 
e2e991a
9e2f4be
98f8d8e
 
 
 
 
31ec36d
98f8d8e
31ec36d
 
0c2c8f3
31ec36d
0c2c8f3
98f8d8e
 
31ec36d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98f8d8e
c88f96b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
import { PROVIDERS_MAP } from './providers';

export interface ModelData {
  id: string;
  name: string;
  createdAt: string;
  likes: number;
  downloads?: number;
  isDataset?: boolean;
  provider: string;
}

export interface Activity {
  date: string;
  count: number;
  level: number;
}

export interface CalendarData {
  [key: string]: Activity[];
}

export interface MonthlyActivity {
  date: string; // YYYY-MM format
  count: number;
  provider: string;
  isDataset: boolean | null;  // null means it includes both
  name?: string;
}

export interface DetailedModelData {
  id: string;
  name: string;
  createdAt: string;
  likes: number;
  downloads: number;
  monthKey: string; // YYYY-MM
  provider: string;
  sortKey: string; // YYYY-MM
  isDataset: boolean;
}

// Generates calendar data from model data
export const generateCalendarData = (modelData: ModelData[]): CalendarData => {
  const data: CalendarData = Object.fromEntries(
    Object.keys(PROVIDERS_MAP).map(provider => [provider, []])
  );

  const today = new Date();
  const startDate = new Date(today);
  startDate.setMonth(today.getMonth() - 11);
  startDate.setDate(1);

  // Create a map to store counts for each provider and date
  const countMap: Record<string, Record<string, number>> = {};

  if (!Array.isArray(modelData)) {
    console.error('Model data is not an array:', modelData);
    modelData = [];
  }

  modelData.forEach(item => {
    const [org] = item.id.split('/');
    const provider = Object.entries(PROVIDERS_MAP).find(([_, info]) =>
      info.authors.includes(org)
    )?.[0];

    if (provider) {
      const date = item.createdAt.split('T')[0];
      if (!countMap[provider]) {
        countMap[provider] = {};
      }
      countMap[provider][date] = (countMap[provider][date] || 0) + 1;
    }
  });

  // Fill in the data array with actual counts and zero counts
  Object.keys(PROVIDERS_MAP).forEach(provider => {
    let currentDate = new Date(startDate);
    while (currentDate <= today) {
      const dateStr = currentDate.toISOString().split('T')[0];
      const count = countMap[provider]?.[dateStr] || 0;
      data[provider].push({
        date: dateStr,
        count,
        level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 2))
      });
      currentDate.setDate(currentDate.getDate() + 1);
    }
  });

  return data;
};

// Aggregates calendar data from all providers into a single heatmap
export const aggregateCalendarData = (calendarData: CalendarData): Activity[] => {
  if (Object.keys(calendarData).length === 0) return [];
  
  // Get the first provider's data to get the date range
  const firstProviderData = Object.values(calendarData)[0];
  
  // Create a map to store aggregated counts by date
  const aggregatedCounts: Record<string, number> = {};
  
  // Sum up counts for each date across all providers
  Object.values(calendarData).forEach(providerData => {
    providerData.forEach(activity => {
      aggregatedCounts[activity.date] = (aggregatedCounts[activity.date] || 0) + activity.count;
    });
  });
  
  // Convert the aggregated counts into Activity array
  return firstProviderData.map(({ date }) => {
    const count = aggregatedCounts[date] || 0;
    return {
      date,
      count,
      level: count === 0 ? 0 : Math.min(4, Math.ceil(count / 3))
    };
  });
};

export const generateMonthlyData = (modelData: ModelData[]): MonthlyActivity[] => {
  const monthlyData: Record<string, Record<string, Record<string, MonthlyActivity>>> = {};

  modelData.forEach(model => {
    const date = new Date(model.createdAt);
    const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
    const provider = model.provider || 'unknown';
    const type = model.isDataset ? 'dataset' : 'model';

    if (!monthlyData[monthKey]) {
      monthlyData[monthKey] = {};
    }
    if (!monthlyData[monthKey][provider]) {
      monthlyData[monthKey][provider] = {};
    }

    if (!monthlyData[monthKey][provider][type]) {
      monthlyData[monthKey][provider][type] = {
        date: monthKey,
        count: 0,
        provider,
        isDataset: model.isDataset ?? false,  // Use nullish coalescing to provide a default
        name: model.name
      };
    }

    monthlyData[monthKey][provider][type].count++;
  });

  // Flatten the nested structure
  return Object.values(monthlyData)
    .flatMap(providerData => 
      Object.values(providerData).flatMap(typeData => 
        Object.values(typeData)
      )
    )
    .sort((a, b) => a.date.localeCompare(b.date));
};

const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));

async function fetchWithRetry(url: string, retries = 3, delayMs = 1000): Promise<Response> {
  for (let i = 0; i < retries; i++) {
    try {
      const response = await fetch(url);
      if (response.status === 429) { // Rate limit exceeded
        console.log(`Rate limit exceeded, waiting ${delayMs}ms before retry ${i + 1}/${retries}`);
        await delay(delayMs);
        continue;
      }
      return response;
    } catch (error) {
      if (i === retries - 1) throw error;
      console.log(`Fetch failed, retrying (${i + 1}/${retries})...`);
      await delay(delayMs);
    }
  }
  throw new Error('Max retries reached');
}

export const fetchAllModelData = async (): Promise<ModelData[]> => {
  const allData: ModelData[] = [];

  for (const [provider, info] of Object.entries(PROVIDERS_MAP)) {
    console.log(`Fetching data for provider: ${provider}`);
    
    for (const author of info.authors) {
      console.log(`  Fetching data for author: ${author}`);
      try {
        // Fetch models
        const modelResponse = await fetchWithRetry(
          `https://huggingface.co/api/models?author=${author}&sort=likes&direction=-1&limit=10000`,
          3,
          2000
        );
        const modelData = await modelResponse.json();
        
        // Fetch datasets
        const datasetResponse = await fetchWithRetry(
          `https://huggingface.co/api/datasets?author=${author}&sort=likes&direction=-1&limit=10000`,
          3,
          2000
        );
        const datasetData = await datasetResponse.json();

        // Combine and process the data
        const combinedData = [
          ...modelData.map((item: any) => ({
            id: item.id,
            name: item.id,
            createdAt: item.createdAt,
            likes: item.likes,
            downloads: item.downloads,
            isDataset: false,
            provider
          })),
          ...datasetData.map((item: any) => ({
            id: item.id,
            name: item.id,
            createdAt: item.createdAt,
            likes: item.likes,
            downloads: item.downloads,
            isDataset: true,
            provider
          }))
        ];

        allData.push(...combinedData);
        console.log(`    Fetched ${combinedData.length} items (${modelData.length} models, ${datasetData.length} datasets) for ${author}`);
      } catch (error) {
        console.error(`Error fetching data for ${provider}/${author}:`, error);
      }
      
      // Add a delay between author requests to avoid rate limiting
      await delay(1000);
    }
  }

  // Remove duplicates based on id
  const uniqueData = Array.from(
    new Map(allData.map(item => [item.id, item])).values()
  );
  
  console.log(`Total unique items fetched: ${uniqueData.length}`);
  return uniqueData;
};

export function processDetailedModelData(models: ModelData[]): DetailedModelData[] {
  return models.map(model => {
    const date = new Date(model.createdAt);
    const monthKey = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
    const provider = model.provider || 'unknown';
    const sortKey = `${monthKey}-${model.name}`;

    return {
      id: model.id,
      name: model.name || model.id,
      createdAt: model.createdAt,
      likes: model.likes || 0,
      downloads: model.downloads || 0, // Set downloads to 0 if it's undefined
      monthKey,
      provider,
      sortKey,
      isDataset: model.isDataset ?? false
    };
  });
}

// Helper function to get total monthly data across all providers
export const getTotalMonthlyData = (monthlyData: MonthlyActivity[]): MonthlyActivity[] => {
  const totalByMonth: Record<string, Record<'model' | 'dataset' | 'all', number>> = {};
  
  // Initialize the structure
  monthlyData.forEach(({ date }) => {
    if (!totalByMonth[date]) {
      totalByMonth[date] = { model: 0, dataset: 0, all: 0 };
    }
  });

  // Aggregate counts by month and type
  monthlyData.forEach(({ date, count, isDataset }) => {
    const type = isDataset ? 'dataset' : 'model';
    totalByMonth[date][type] += count;
    totalByMonth[date].all += count;  // Track combined count
  });

  // Convert to MonthlyActivity array
  return Object.entries(totalByMonth).flatMap(([date, counts]) => [
    {
      date,
      count: counts.all,
      provider: 'Total',
      isDataset: null  // null means it includes both
    },
    {
      date,
      count: counts.model,
      provider: 'Total',
      isDataset: false
    },
    {
      date,
      count: counts.dataset,
      provider: 'Total',
      isDataset: true
    }
  ]).sort((a, b) => a.date.localeCompare(b.date));
};

// Convert monthly activity data to CSV format
export const convertToCSV = (data: MonthlyActivity[]): string => {
  // Group data by date
  const dataByDate: Record<string, Record<string, number>> = {};
  const providers = new Set<string>();
  
  // Initialize and collect data
  data.forEach(({ date, provider, count }) => {
    if (!dataByDate[date]) {
      dataByDate[date] = {};
    }
    dataByDate[date][provider] = count;
    providers.add(provider);
  });

  // Create CSV header
  const header = ['Date', ...Array.from(providers)];
  
  // Create CSV rows
  const rows = Object.entries(dataByDate)
    .sort(([a], [b]) => a.localeCompare(b))
    .map(([date, providerData]) => {
      const row = [date];
      header.slice(1).forEach(provider => {
        row.push((providerData[provider] || 0).toString());
      });
      return row;
    });

  // Combine header and rows
  return [header, ...rows]
    .map(row => row.join(','))
    .join('\n');
};