Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,8 +1,36 @@ | |
| 1 | 
             
            import gradio as gr
         | 
|  | |
| 2 | 
             
            import plotly.express as px
         | 
| 3 | 
             
            from pathlib import Path
         | 
| 4 | 
             
            import pandas as pd
         | 
| 5 | 
             
            import numpy as np
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 6 | 
             
            abs_path = Path(__file__).parent
         | 
| 7 |  | 
| 8 | 
             
            def parse_model_args(model_args):
         | 
| @@ -34,47 +62,75 @@ def parse_model_precision(model_args): | |
| 34 |  | 
| 35 | 
             
            # Any pandas-compatible data
         | 
| 36 | 
             
            df = pd.read_csv(str(abs_path / "eval_results.csv"))
         | 
|  | |
| 37 | 
             
            # take acc only
         | 
| 38 | 
             
            df = df[df['metric'] == 'acc']
         | 
| 39 | 
             
            # dedup
         | 
| 40 | 
             
            df = df.drop_duplicates(subset=['model', 'task'])
         | 
| 41 | 
             
            # pivot df, such that the column names are model,task,efficiency
         | 
| 42 | 
             
            # but keep precision in its original place
         | 
| 43 | 
            -
            df | 
|  | |
|  | |
| 44 |  | 
| 45 | 
             
            df['precision'] = df['model'].apply(lambda x: x.split(":")[-1])
         | 
| 46 | 
             
            df['model'] = df['model'].apply(lambda x: x.split(":")[0])
         | 
| 47 | 
            -
             | 
| 48 | 
            -
            # average over all columns starting with 'task_'
         | 
| 49 | 
             
            df['avg_acc'] = df.filter(like='task_').mean(axis=1)
         | 
| 50 | 
            -
             | 
| 51 | 
            -
            # rename columns starting with 'task_' by removing 'task_'
         | 
| 52 | 
             
            df = df.rename(columns=lambda x: x.replace('task_', ''))
         | 
| 53 | 
             
            numeric_columns = df.select_dtypes(include=[np.number]).columns
         | 
|  | |
|  | |
| 54 | 
             
            df[numeric_columns] = (df[numeric_columns]*100).round(2)
         | 
|  | |
|  | |
| 55 |  | 
| 56 | 
             
            with gr.Blocks() as demo:
         | 
| 57 | 
             
                gr.Markdown("""
         | 
| 58 | 
             
                # 🥇 Efficient LLM Leaderboard
         | 
| 59 | 
             
                """)
         | 
| 60 | 
            -
                 | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 79 | 
             
            if __name__ == "__main__":
         | 
| 80 | 
            -
                demo.launch()
         | 
|  | |
| 1 | 
             
            import gradio as gr
         | 
| 2 | 
            +
            from gradio_leaderboard import Leaderboard
         | 
| 3 | 
             
            import plotly.express as px
         | 
| 4 | 
             
            from pathlib import Path
         | 
| 5 | 
             
            import pandas as pd
         | 
| 6 | 
             
            import numpy as np
         | 
| 7 | 
            +
            from langchain_openai import ChatOpenAI
         | 
| 8 | 
            +
            from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
         | 
| 9 | 
            +
            from langchain.agents.agent_types import AgentType
         | 
| 10 | 
            +
            from langchain_google_genai import ChatGoogleGenerativeAI
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            def explain_df(query, df):
         | 
| 13 | 
            +
                agent = create_pandas_dataframe_agent(
         | 
| 14 | 
            +
                    # ChatOpenAI(
         | 
| 15 | 
            +
                    #     base_url="https://fmapi.swissai.cscs.ch",
         | 
| 16 | 
            +
                    #     temperature=0.01,
         | 
| 17 | 
            +
                    #     model="meta-llama/Llama-3.3-70B-Instruct"
         | 
| 18 | 
            +
                    # ),
         | 
| 19 | 
            +
                    ChatGoogleGenerativeAI(
         | 
| 20 | 
            +
                        model="gemini-1.5-pro",
         | 
| 21 | 
            +
                        temperature=0,
         | 
| 22 | 
            +
                        max_tokens=None,
         | 
| 23 | 
            +
                        timeout=None,
         | 
| 24 | 
            +
                        max_retries=2,
         | 
| 25 | 
            +
                    ),
         | 
| 26 | 
            +
                    df,
         | 
| 27 | 
            +
                    verbose=True,
         | 
| 28 | 
            +
                    allow_dangerous_code=True,
         | 
| 29 | 
            +
                )
         | 
| 30 | 
            +
                response = agent.invoke(query)
         | 
| 31 | 
            +
                return response['output']
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                
         | 
| 34 | 
             
            abs_path = Path(__file__).parent
         | 
| 35 |  | 
| 36 | 
             
            def parse_model_args(model_args):
         | 
|  | |
| 62 |  | 
| 63 | 
             
            # Any pandas-compatible data
         | 
| 64 | 
             
            df = pd.read_csv(str(abs_path / "eval_results.csv"))
         | 
| 65 | 
            +
            perf_df = pd.read_csv(str(abs_path / "perfbench_results.csv"))
         | 
| 66 | 
             
            # take acc only
         | 
| 67 | 
             
            df = df[df['metric'] == 'acc']
         | 
| 68 | 
             
            # dedup
         | 
| 69 | 
             
            df = df.drop_duplicates(subset=['model', 'task'])
         | 
| 70 | 
             
            # pivot df, such that the column names are model,task,efficiency
         | 
| 71 | 
             
            # but keep precision in its original place
         | 
| 72 | 
            +
            df['model_physical_size'] = df['model_physical_size'].apply(lambda x: x/1024/1024/1024)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            df = df.pivot(index=['model','hf_name','model_physical_size'], columns='task', values='value').reset_index()
         | 
| 75 |  | 
| 76 | 
             
            df['precision'] = df['model'].apply(lambda x: x.split(":")[-1])
         | 
| 77 | 
             
            df['model'] = df['model'].apply(lambda x: x.split(":")[0])
         | 
|  | |
|  | |
| 78 | 
             
            df['avg_acc'] = df.filter(like='task_').mean(axis=1)
         | 
| 79 | 
            +
             | 
|  | |
| 80 | 
             
            df = df.rename(columns=lambda x: x.replace('task_', ''))
         | 
| 81 | 
             
            numeric_columns = df.select_dtypes(include=[np.number]).columns
         | 
| 82 | 
            +
            # remove physical size from numeric columns
         | 
| 83 | 
            +
            numeric_columns = numeric_columns.drop('model_physical_size')
         | 
| 84 | 
             
            df[numeric_columns] = (df[numeric_columns]*100).round(2)
         | 
| 85 | 
            +
            df['model_physical_size'] = df['model_physical_size'].round(2)
         | 
| 86 | 
            +
            full_df = df.merge(perf_df, left_on='hf_name', right_on='hf_name', how='left')
         | 
| 87 |  | 
| 88 | 
             
            with gr.Blocks() as demo:
         | 
| 89 | 
             
                gr.Markdown("""
         | 
| 90 | 
             
                # 🥇 Efficient LLM Leaderboard
         | 
| 91 | 
             
                """)
         | 
| 92 | 
            +
                with gr.Tabs():
         | 
| 93 | 
            +
                    with gr.TabItem("Leaderboard"):
         | 
| 94 | 
            +
                        # ...existing code...
         | 
| 95 | 
            +
                        task_options = [col for col in df.columns if col not in ['model','hf_name','model_physical_size', 'precision']]
         | 
| 96 | 
            +
                        with gr.Row():
         | 
| 97 | 
            +
                            selected_tasks = gr.CheckboxGroup(choices=task_options, label="Select Tasks")
         | 
| 98 | 
            +
                        with gr.Row():
         | 
| 99 | 
            +
                            accuracy_plot = gr.Plot(label="Accuracy Plot")
         | 
| 100 | 
            +
                            line_plot = gr.Plot(label="Average Accuracy vs Model Size")
         | 
| 101 | 
            +
                        with gr.Row():
         | 
| 102 | 
            +
                            throughput_line_plot = gr.Plot(label="Throughput vs Average Accuracy")
         | 
| 103 | 
            +
                            latency_line_plot = gr.Plot(label="Latency vs Average Accuracy")
         | 
| 104 | 
            +
                        with gr.Row():
         | 
| 105 | 
            +
                            data_table = gr.Dataframe(value=df, label="Result Table")
         | 
| 106 | 
            +
                        def update_outputs(selected_tasks):
         | 
| 107 | 
            +
                            if not selected_tasks:
         | 
| 108 | 
            +
                                return df[['model', 'precision']], None, None
         | 
| 109 | 
            +
                            filtered_df = df[['model', 'precision', 'model_physical_size','hf_name'] + selected_tasks]
         | 
| 110 | 
            +
                            # average accuracy of selected tasks
         | 
| 111 | 
            +
                            filtered_df['avg_accuracy'] = filtered_df[selected_tasks].mean(axis=1)
         | 
| 112 | 
            +
                            bar_fig = px.bar(filtered_df, x='model', y='avg_accuracy', color='precision', barmode='group')
         | 
| 113 | 
            +
                            line_fig = px.line(filtered_df, x='model_physical_size', y='avg_accuracy', color='model', symbol='precision')
         | 
| 114 | 
            +
                            # set title of bar_fig
         | 
| 115 | 
            +
                            bar_fig.update_layout(title=f'tasks: {", ".join(selected_tasks)}')
         | 
| 116 | 
            +
                            line_fig.update_layout(title=f'tasks: {", ".join(selected_tasks)}')
         | 
| 117 | 
            +
                            with_perf_df = filtered_df.merge(perf_df, left_on='hf_name', right_on='hf_name', how='left')
         | 
| 118 | 
            +
                            throughput_line_fig = px.line(with_perf_df, x='output_throughput', y='avg_accuracy', color='model', symbol='precision')
         | 
| 119 | 
            +
                            latency_line_fig = px.line(with_perf_df, x="avg_e2e_latency", y='avg_accuracy', color='model', symbol='precision')
         | 
| 120 | 
            +
                            return with_perf_df, bar_fig, line_fig, throughput_line_fig, latency_line_fig
         | 
| 121 | 
            +
                        selected_tasks.change(
         | 
| 122 | 
            +
                            fn=update_outputs,
         | 
| 123 | 
            +
                            inputs=selected_tasks, 
         | 
| 124 | 
            +
                            outputs=[data_table, accuracy_plot, line_plot, throughput_line_plot, latency_line_plot]
         | 
| 125 | 
            +
                        )
         | 
| 126 | 
            +
                    with gr.TabItem("Find Model"):
         | 
| 127 | 
            +
                        query_input = gr.Textbox(label="Enter your query", placeholder="Enter your query here")
         | 
| 128 | 
            +
                        response_output = gr.Textbox(label="Response", interactive=False)
         | 
| 129 | 
            +
                        query_input.submit(
         | 
| 130 | 
            +
                            fn=lambda query: explain_df(query, df),
         | 
| 131 | 
            +
                            inputs=query_input,
         | 
| 132 | 
            +
                            outputs=response_output
         | 
| 133 | 
            +
                        )
         | 
| 134 | 
            +
                
         | 
| 135 | 
             
            if __name__ == "__main__":
         | 
| 136 | 
            +
                demo.launch(share=True)
         | 

