# Utility functions for filtering the dataframe import pandas as pd def filter_cols(df): df = df[[ 'Model Name', 'Clemscore', 'Input $/1M tokens', 'Output $/1M tokens', 'Latency (s)', 'Context Size (k)', 'Parameters (B)', 'Release Date', 'License' ]] return df def filter(df, language_list, parameters, input_price, output_price, multimodal, context, open_weight, start, end, license ): if not df.empty: # Check if df is non-empty df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))] if not df.empty: # Split dataframe by Open Weight open_weight_true = df[df['Open Weight'] == True] open_weight_false = df[df['Open Weight'] == False] # Get max parameter size for open weight models max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0 # Filter only the open weight models based on parameters if not open_weight_true.empty: if parameters[1] >= max_parameter_size: filtered_open = open_weight_true[ (open_weight_true['Parameters (B)'] >= parameters[0]) ] else: filtered_open = open_weight_true[ (open_weight_true['Parameters (B)'] >= parameters[0]) & (open_weight_true['Parameters (B)'] <= parameters[1]) ] # Combine filtered open weight models with unfiltered commercial models df = pd.concat([filtered_open, open_weight_false]) if not df.empty: # Check if df is non-empty df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])] if not df.empty: # Check if df is non-empty df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])] print("Price") print(df) if not df.empty: # Check if df is non-empty if "Image" in multimodal: df = df[df['Image'] == True] if "Multi-Image" in multimodal: df = df[df['Multiple Image'] == True] if "Audio" in multimodal: df = df[df['Audio'] == True] if "Video" in multimodal: df = df[df['Video'] == True] # if not df.empty: # Check if df is non-empty # df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))] print("Modality") print(df) if not df.empty: # Check if df is non-empty if "Open" in open_weight and "Commercial" not in open_weight: df = df[df['Open Weight'] == True] elif "Commercial" in open_weight and "Open" not in open_weight: df = df[df['Open Weight'] == False] elif "Open" not in open_weight and "Commercial" not in open_weight: # Return empty DataFrame with same columns df = pd.DataFrame(columns=df.columns) if not df.empty: # Check if df is non-empty df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))] # Convert 'Release Date' to int temporarily if not df.empty: # Check if df is non-empty df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9 # Convert to seconds since epoch # Convert start and end to int (seconds since epoch) start = int(pd.to_datetime(start).timestamp()) end = int(pd.to_datetime(end).timestamp()) # Filter based on the converted 'Release Date' if not df.empty: # Check if df is non-empty df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)] df = filter_cols(df) df = df.sort_values(by='Clemscore', ascending=False) print(df) return df # Return the filtered dataframe