File size: 3,654 Bytes
68e6513 fbdc657 68e6513 fbdc657 ee6a180 1580227 ee6a180 529d871 fbdc657 68e6513 1580227 68e6513 fbdc657 ee6a180 3433b65 ee6a180 1580227 ee6a180 3433b65 ee6a180 3433b65 ee6a180 3433b65 ee6a180 3433b65 ee6a180 3433b65 ee6a180 3433b65 ee6a180 3433b65 ee6a180 3433b65 fbdc657 3433b65 fbdc657 3433b65 fbdc657 3433b65 68e6513 529d871 ee6a180 68e6513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# Utility functions for filtering the dataframe
import pandas as pd
def filter_cols(df):
df = df[[
'Model Name',
'Clemscore',
'Input $/1M tokens',
'Output $/1M tokens',
'Latency (s)',
'Context Size (k)',
'Parameters (B)',
'Release Date',
'License'
]]
return df
def filter(df, language_list, parameters, input_price, output_price, multimodal,
context, open_weight, start, end, license ):
if not df.empty: # Check if df is non-empty
df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]
if not df.empty:
# Split dataframe by Open Weight
open_weight_true = df[df['Open Weight'] == True]
open_weight_false = df[df['Open Weight'] == False]
# Get max parameter size for open weight models
max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0
# Filter only the open weight models based on parameters
if not open_weight_true.empty:
if parameters[1] >= max_parameter_size:
filtered_open = open_weight_true[
(open_weight_true['Parameters (B)'] >= parameters[0])
]
else:
filtered_open = open_weight_true[
(open_weight_true['Parameters (B)'] >= parameters[0]) &
(open_weight_true['Parameters (B)'] <= parameters[1])
]
# Combine filtered open weight models with unfiltered commercial models
df = pd.concat([filtered_open, open_weight_false])
if not df.empty: # Check if df is non-empty
df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])]
if not df.empty: # Check if df is non-empty
df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])]
print("Price")
print(df)
if not df.empty: # Check if df is non-empty
if "Image" in multimodal:
df = df[df['Image'] == True]
if "Multi-Image" in multimodal:
df = df[df['Multiple Image'] == True]
if "Audio" in multimodal:
df = df[df['Audio'] == True]
if "Video" in multimodal:
df = df[df['Video'] == True]
# if not df.empty: # Check if df is non-empty
# df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))]
print("Modality")
print(df)
if not df.empty: # Check if df is non-empty
if "Open" in open_weight and "Commercial" not in open_weight:
df = df[df['Open Weight'] == True]
elif "Commercial" in open_weight and "Open" not in open_weight:
df = df[df['Open Weight'] == False]
if not df.empty: # Check if df is non-empty
df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))]
# Convert 'Release Date' to int temporarily
if not df.empty: # Check if df is non-empty
df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9 # Convert to seconds since epoch
# Convert start and end to int (seconds since epoch)
start = int(pd.to_datetime(start).timestamp())
end = int(pd.to_datetime(end).timestamp())
# Filter based on the converted 'Release Date'
if not df.empty: # Check if df is non-empty
df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)]
df = filter_cols(df)
print(df)
return df # Return the filtered dataframe
|