Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| from glob import glob | |
| from pathlib import Path | |
| from tabs.dashboard import df | |
| from tabs.faq import ( | |
| about_olas_predict_benchmark, | |
| about_olas_predict, | |
| about_the_dataset, | |
| about_the_tools | |
| ) | |
| from tabs.howto_benchmark import how_to_run | |
| from tabs.run_benchmark import run_benchmark_main | |
| demo = gr.Blocks() | |
| def run_benchmark_gradio(tool_name, model_name, openai_api_key, anthropic_api_key): | |
| """Run the benchmark using inputs.""" | |
| if tool_name is None: | |
| return "Please enter the name of your tool." | |
| if openai_api_key is None and anthropic_api_key is None: | |
| return "Please enter either OpenAI or Anthropic API key." | |
| result = run_benchmark_main(tool_name, model_name, openai_api_key, anthropic_api_key) | |
| if result == 'completed': | |
| # get the results file in the results directory | |
| fns = glob('results/*.csv') | |
| print(f"Number of files in results directory: {len(fns)}") | |
| # convert to Path | |
| files = [Path(file) for file in fns] | |
| # get results and summary files | |
| results_files = [file for file in files if 'results' in file.name] | |
| # the other file is the summary file | |
| summary_files = [file for file in files if 'summary' in file.name] | |
| print(results_files, summary_files) | |
| # get the path with results | |
| results_df = pd.read_csv(results_files[0]) | |
| summary_df = pd.read_csv(summary_files[0]) | |
| # make sure all df float values are rounded to 4 decimal places | |
| results_df = results_df.round(4) | |
| summary_df = summary_df.round(4) | |
| return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df) | |
| return gr.Textbox(label="Benchmark Result", value=result, interactive=False), gr.Textbox(label="Summary", value="") | |
| with demo: | |
| gr.HTML("<h1>Olas Predict Benchmark</hjson>") | |
| gr.Markdown("Leaderboard showing the performance of Olas Predict tools on the Autocast dataset and overview of the project.") | |
| with gr.Tabs() as tabs: | |
| # first tab - leaderboard | |
| with gr.TabItem("π Benchmark Leaderboard", id=0): | |
| gr.components.Dataframe( | |
| value=df, | |
| ) | |
| # second tab - about | |
| with gr.TabItem("βΉοΈ About"): | |
| with gr.Row(): | |
| with gr.Accordion("About the Benchmark", open=False): | |
| gr.Markdown(about_olas_predict_benchmark) | |
| with gr.Row(): | |
| with gr.Accordion("About the Tools", open=False): | |
| gr.Markdown(about_the_tools) | |
| with gr.Row(): | |
| with gr.Accordion("About the Autocast Dataset", open=False): | |
| gr.Markdown(about_the_dataset) | |
| with gr.Row(): | |
| with gr.Accordion("About Olas", open=False): | |
| gr.Markdown(about_olas_predict) | |
| # third tab - how to run the benchmark | |
| with gr.TabItem("π Contribute"): | |
| gr.Markdown(how_to_run) | |
| def update_dropdown(tool): | |
| if "claude" in tool: | |
| return ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229"] | |
| else: | |
| return ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"] | |
| # fourth tab - run the benchmark | |
| with gr.TabItem("π₯ Run the Benchmark"): | |
| with gr.Row(): | |
| tool_name = gr.Dropdown( | |
| [ | |
| "prediction-offline", | |
| "prediction-online", | |
| # "prediction-online-summarized-info", | |
| "prediction-offline-sme", | |
| "prediction-online-sme", | |
| "claude-prediction-offline", | |
| "claude-prediction-online", | |
| 'prediction-request-rag', | |
| "prediction-with-research-conservative", | |
| "prediction-with-research-bold", | |
| "prediction-request-reasoning-claude", | |
| "prediction-request-rag-claude", | |
| "prediction-url-cot-claude", | |
| ], label="Tool Name", info="Choose the tool to run") | |
| model_name = gr.Dropdown([ | |
| "gpt-3.5-turbo-0125", | |
| "gpt-4-0125-preview" | |
| "claude-3-haiku-20240307", | |
| "claude-3-sonnet-20240229", | |
| "claude-3-opus-20240229", | |
| ], label="Model Name", info="Choose the model to use") | |
| with gr.Row(): | |
| openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password") | |
| anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password") | |
| with gr.Row(): | |
| run_button = gr.Button("Run Benchmark") | |
| with gr.Row(): | |
| with gr.Accordion("Results", open=True): | |
| result = gr.Dataframe() | |
| with gr.Row(): | |
| with gr.Accordion("Summary", open=False): | |
| summary = gr.Dataframe() | |
| run_button.click(run_benchmark_gradio, | |
| inputs=[tool_name, model_name, openai_api_key, anthropic_api_key], | |
| outputs=[result, summary]) | |
| demo.queue(default_concurrency_limit=40).launch(server_port=7860) |