File size: 3,654 Bytes
68e6513
 
fbdc657
 
68e6513
 
 
fbdc657
ee6a180
 
 
 
1580227
ee6a180
529d871
fbdc657
68e6513
1580227
68e6513
 
 
fbdc657
 
 
ee6a180
3433b65
 
ee6a180
 
 
 
 
1580227
ee6a180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3433b65
ee6a180
3433b65
 
ee6a180
 
 
 
 
3433b65
 
 
ee6a180
3433b65
ee6a180
3433b65
ee6a180
3433b65
ee6a180
3433b65
ee6a180
 
 
 
 
 
3433b65
 
 
 
 
 
fbdc657
3433b65
 
fbdc657
 
3433b65
 
fbdc657
 
 
 
 
 
3433b65
 
68e6513
 
529d871
ee6a180
 
68e6513
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Utility functions for filtering the dataframe

import pandas as pd

def filter_cols(df):

    df = df[[
    'Model Name', 
    'Clemscore',
    'Input $/1M tokens', 
    'Output $/1M tokens',
    'Latency (s)',
    'Context Size (k)', 
    'Parameters (B)',
    'Release Date', 
    'License'
    ]]
    
    return df


def filter(df, language_list, parameters, input_price, output_price, multimodal,
           context, open_weight, start, end, license ):
    

    if not df.empty:  # Check if df is non-empty
        df = df[df['Languages'].apply(lambda x: all(lang in x for lang in language_list))]

    if not df.empty:
        # Split dataframe by Open Weight
        open_weight_true = df[df['Open Weight'] == True]
        open_weight_false = df[df['Open Weight'] == False]
        
        # Get max parameter size for open weight models
        max_parameter_size = open_weight_true['Parameters (B)'].max() if not open_weight_true.empty else 0
        
        # Filter only the open weight models based on parameters
        if not open_weight_true.empty:
            if parameters[1] >= max_parameter_size:
                filtered_open = open_weight_true[
                    (open_weight_true['Parameters (B)'] >= parameters[0])
                ]
            else:
                filtered_open = open_weight_true[
                    (open_weight_true['Parameters (B)'] >= parameters[0]) & 
                    (open_weight_true['Parameters (B)'] <= parameters[1])
                ]
            
            # Combine filtered open weight models with unfiltered commercial models
            df = pd.concat([filtered_open, open_weight_false])

    if not df.empty:  # Check if df is non-empty
        df = df[(df['Input $/1M tokens'] >= input_price[0]) & (df['Input $/1M tokens'] <= input_price[1])]
    
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Output $/1M tokens'] >= output_price[0]) & (df['Output $/1M tokens'] <= output_price[1])]


    print("Price")    
    print(df)

    if not df.empty:  # Check if df is non-empty
        if "Image" in multimodal:
            df = df[df['Image'] == True]
        if "Multi-Image" in multimodal:
            df = df[df['Multiple Image'] == True]
        if "Audio" in multimodal:
            df = df[df['Audio'] == True]
        if "Video" in multimodal:
            df = df[df['Video'] == True]

    # if not df.empty:  # Check if df is non-empty
    #     df = df[(df['Context Size (k)'] >= (context[0])) & (df['Context Size (k)'] <= (context[1]))]


    print("Modality")    
    print(df)

    if not df.empty:  # Check if df is non-empty
        if "Open" in open_weight and "Commercial" not in open_weight:
            df = df[df['Open Weight'] == True]
        elif "Commercial" in open_weight and "Open" not in open_weight:
            df = df[df['Open Weight'] == False]
        
    if not df.empty:  # Check if df is non-empty
        df = df[df['License Name'].apply(lambda x: any(lic in x for lic in license))]

    # Convert 'Release Date' to int temporarily
    if not df.empty:  # Check if df is non-empty
        df['Temp Date'] = pd.to_datetime(df['Temp Date']).astype(int) // 10**9  # Convert to seconds since epoch

    # Convert start and end to int (seconds since epoch)
    start = int(pd.to_datetime(start).timestamp())  
    end = int(pd.to_datetime(end).timestamp())    

    # Filter based on the converted 'Release Date'
    if not df.empty:  # Check if df is non-empty
        df = df[(df['Temp Date'] >= start) & (df['Temp Date'] <= end)]

    df = filter_cols(df)

    print(df)

    return df  # Return the filtered dataframe