import pandas as pd import gradio as gr import os from gradio_rangeslider import RangeSlider from src.filter_utils import filter, filter_cols # Main Leaderboard containing everything text_leaderboard = pd.read_csv(os.path.join('assets', 'merged_data.csv')) text_leaderboard = text_leaderboard.sort_values(by='Clemscore', ascending=False) open_weight_df = text_leaderboard[text_leaderboard['Open Weight'] == True] if not open_weight_df.empty: # Check if filtered df is non-empty max_parameter_size = open_weight_df['Parameters (B)'].max() # Short leaderboard containing fixed columns short_leaderboard = filter_cols(text_leaderboard) ## Extract data langs = [] licenses = [] ip_prices = [] op_prices = [] latencies = [] parameters = [] contexts = [] dates = [] for i in range(len(text_leaderboard)): lang_splits = text_leaderboard.iloc[i]['Languages'].split(',') lang_splits = [s.strip() for s in lang_splits] langs += lang_splits license_name = text_leaderboard.iloc[i]['License Name'] licenses.append(license_name) ip_prices.append(text_leaderboard.iloc[i]['Input $/1M tokens']) op_prices.append(text_leaderboard.iloc[i]['Output $/1M tokens']) latencies.append(text_leaderboard.iloc[i]['Latency (s)']) parameters.append(text_leaderboard.iloc[i]['Parameters (B)']) contexts.append(text_leaderboard.iloc[i]['Context Size (k)']) dates.append(text_leaderboard.iloc[i]['Release Date']) langs = list(set(langs)) langs.sort() licenses = list(set(licenses)) licenses.sort() max_input_price = max(ip_prices) max_output_price = max(op_prices) max_latency = max(latencies) min_parameters = 0 if pd.isna(min(parameters)) else min(parameters) max_parameter = max_parameter_size parameter_step = 1 print(f"MIN {min_parameters}, MAX {max_parameter}") min_context = min(contexts) max_context = max(contexts) context_step = 8 min_date = min(dates) max_date = max(dates) TITLE = """