import os
nvidia_available = os.system("nvidia-smi") == 0
if nvidia_available:
os.system(
"pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
)
DEVICES = ["cpu", "cuda"]
else:
os.system(
"pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
)
DEVICES = ["cpu"]
BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
BENCHMARKS = ["inference", "training"]
import random
import gradio as gr
from optimum_benchmark.task_utils import (
TASKS_TO_AUTOMODELS,
infer_task_from_model_name_or_path,
)
from run import run_benchmark
from config_store import (
get_training_config,
get_inference_config,
get_neural_compressor_config,
get_onnxruntime_config,
get_openvino_config,
get_pytorch_config,
)
with gr.Blocks() as demo:
# add image
gr.Markdown(
""""""
)
# title text
gr.Markdown("
" "Unlock " "
" ) model = gr.Textbox( label="model", value="distilbert-base-uncased-finetuned-sst-2-english", info="Model to run the benchmark on. Press enter to infer the task automatically.", ) task = gr.Dropdown( label="task", value="text-classification", choices=list(TASKS_TO_AUTOMODELS.keys()), info="Task to run the benchmark on. Can be infered automatically by submitting a model.", ) device = gr.Dropdown( value="cpu", label="device", choices=DEVICES, info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.", ) experiment = gr.Textbox( label="experiment_name", value=f"awesome-experiment-{random.randint(0, 1000)}", info="Name of the experiment. Will be used to create a folder where results are stored.", ) model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task) with gr.Row(): with gr.Column(): with gr.Row(): backend = gr.Dropdown( label="backend", choices=BACKENDS, value=BACKENDS[0], info="Backend to run the benchmark on.", ) with gr.Row() as backend_configs: with gr.Accordion(label="Pytorch Config", open=False, visible=True): pytorch_config = get_pytorch_config() with gr.Accordion(label="OnnxRunTime Config", open=False, visible=False): onnxruntime_config = get_onnxruntime_config() with gr.Accordion(label="OpenVINO Config", open=False, visible=False): openvino_config = get_openvino_config() with gr.Accordion(label="Neural Compressor Config", open=False, visible=False): neural_compressor_config = get_neural_compressor_config() # hide backend configs based on backend backend.change( inputs=backend, outputs=backend_configs.children, fn=lambda value: [gr.update(visible=value == key) for key in BACKENDS], ) with gr.Column(): with gr.Row(): benchmark = gr.Dropdown( label="benchmark", choices=BENCHMARKS, value=BENCHMARKS[0], info="Type of benchmark to run.", ) with gr.Row() as benchmark_configs: with gr.Accordion(label="Inference Config", open=False, visible=True): inference_config = get_inference_config() with gr.Accordion(label="Training Config", open=False, visible=False): training_config = get_training_config() # hide benchmark configs based on benchmark benchmark.change( inputs=benchmark, outputs=benchmark_configs.children, fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS], ) baseline = gr.Checkbox( value=False, label="Compare to Baseline", info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.", ) button = gr.Button(value="Run Benchmark", variant="primary") with gr.Accordion(label="", open=True): html_output = gr.HTML() table_output = gr.Dataframe(visible=False) button.click( fn=run_benchmark, inputs={ experiment, baseline, model, task, device, backend, benchmark, *pytorch_config, *openvino_config, *onnxruntime_config, *neural_compressor_config, *inference_config, *training_config, }, outputs=[html_output, button, table_output], queue=True, ) demo.queue().launch()