|
|
|
|
|
import pandas as pd |
|
|
|
def filter_cols(df): |
|
|
|
df = df[[ |
|
'Model Name', |
|
'Clemscore', |
|
'Input $/1M tokens', |
|
'Output $/1M tokens', |
|
'Latency (s)', |
|
'Context Size (k)', |
|
'Parameters (B)', |
|
'Release Date', |
|
'License' |
|
]] |
|
|
|
return df |
|
|
|
|
|
def filter(df, language_list, parameters, input_price, output_price, multimodal, |
|
context, open_weight, start, end, license ): |
|
|
|
|
|
if not df.empty: |
|
df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))] |
|
|
|
if not df.empty: |
|
|
|
open_weight_true = df[df['Open Weight'] == True] |
|
open_weight_false = df[df['Open Weight'] == False] |
|
|
|
|
|
max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0 |
|
|
|
|
|
if not open_weight_true.empty: |
|
if parameters[1] >= max_parameter_size: |
|
filtered_open = open_weight_true[ |
|
(open_weight_true['Parameters (B)'] >= parameters[0]) |
|
] |
|
else: |
|
filtered_open = open_weight_true[ |
|
(open_weight_true['Parameters (B)'] >= parameters[0]) & |
|
(open_weight_true['Parameters (B)'] <= parameters[1]) |
|
] |
|
|
|
|
|
df = pd.concat([filtered_open, open_weight_false]) |
|
|
|
if not df.empty: |
|
df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])] |
|
|
|
if not df.empty: |
|
df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])] |
|
|
|
|
|
print("Price") |
|
print(df) |
|
|
|
if not df.empty: |
|
if "Image" in multimodal: |
|
df = df[df['Image'] == True] |
|
if "Multi-Image" in multimodal: |
|
df = df[df['Multiple Image'] == True] |
|
if "Audio" in multimodal: |
|
df = df[df['Audio'] == True] |
|
if "Video" in multimodal: |
|
df = df[df['Video'] == True] |
|
|
|
|
|
|
|
|
|
|
|
print("Modality") |
|
print(df) |
|
|
|
if not df.empty: |
|
if "Open" in open_weight and "Commercial" not in open_weight: |
|
df = df[df['Open Weight'] == True] |
|
elif "Commercial" in open_weight and "Open" not in open_weight: |
|
df = df[df['Open Weight'] == False] |
|
|
|
if not df.empty: |
|
df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))] |
|
|
|
|
|
if not df.empty: |
|
df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9 |
|
|
|
|
|
start = int(pd.to_datetime(start).timestamp()) |
|
end = int(pd.to_datetime(end).timestamp()) |
|
|
|
|
|
if not df.empty: |
|
df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)] |
|
|
|
df = filter_cols(df) |
|
|
|
print(df) |
|
|
|
return df |
|
|
|
|