File size: 5,107 Bytes
55f4d70 828190b 36fb388 55f4d70 828190b 55f4d70 828190b 55f4d70 828190b 55f4d70 36fb388 55f4d70 828190b 36fb388 55f4d70 36fb388 55f4d70 36fb388 828190b 36fb388 828190b 36fb388 55f4d70 828190b 02d3620 828190b 36fb388 828190b 36fb388 8624024 828190b 36fb388 828190b 36fb388 828190b 36fb388 828190b 36fb388 828190b 36fb388 828190b 36fb388 828190b 55f4d70 36fb388 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import gradio as gr
import subprocess
import sys
import os
import threading
import time
import uuid
class Logger:
def __init__(self, filename):
self.terminal = sys.stdout
self.log = open(filename, "w")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
self.log.flush()
def flush(self):
self.terminal.flush()
self.log.flush()
def isatty(self):
return False
default_command = "bigcodebench.evaluate"
is_running = False
def generate_command(
jsonl_file, split, subset, save_pass_rate, parallel,
min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
check_gt_only, no_gt
):
command = [default_command]
if jsonl_file is not None:
samples = os.path.basename(jsonl_file.name)
command.extend(["--samples", samples])
command.extend(["--split", split, "--subset", subset])
if save_pass_rate:
command.append("--save_pass_rate")
if parallel is not None and parallel != 0:
command.extend(["--parallel", str(int(parallel))])
command.extend([
"--min-time-limit", str(min_time_limit),
"--max-as-limit", str(int(max_as_limit)),
"--max-data-limit", str(int(max_data_limit)),
"--max-stack-limit", str(int(max_stack_limit))
])
if check_gt_only:
command.append("--check-gt-only")
if no_gt:
command.append("--no-gt")
return " ".join(command)
def run_bigcodebench(command):
global is_running
is_running = True
yield f"Executing command: {command}\n"
process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
for line in process.stdout:
yield line
process.wait()
if process.returncode != 0:
yield f"Error: Command exited with status {process.returncode}\n"
cleanup_command = "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"
subprocess.run(cleanup_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
is_running = False
yield "Evaluation completed.\n"
def stream_logs(command):
global is_running
if is_running:
yield "A command is already running. Please wait for it to finish.\n"
return
log_content = []
for log_line in run_bigcodebench(command):
log_content.append(log_line)
yield "".join(log_content)
def read_logs(log_file):
if os.path.exists(log_file):
with open(log_file, "r") as f:
return f.read()
return ""
with gr.Blocks() as demo:
gr.Markdown("# BigCodeBench Evaluator")
with gr.Row():
jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="hard")
with gr.Row():
save_pass_rate = gr.Checkbox(label="Save Pass Rate")
parallel = gr.Number(label="Parallel (optional)", precision=0)
min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
max_as_limit = gr.Number(label="Max AS Limit", value=200*1024, precision=0)
with gr.Row():
max_data_limit = gr.Number(label="Max Data Limit", value=10*1024, precision=0)
max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
check_gt_only = gr.Checkbox(label="Check GT Only")
no_gt = gr.Checkbox(label="No GT")
command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
submit_btn = gr.Button("Run Evaluation")
log_output = gr.Textbox(label="Execution Logs", lines=10)
# Hidden component to store the unique log file path
session_log_file = gr.State("")
def update_command(*args):
return generate_command(*args)
input_components = [
jsonl_file, split, subset, save_pass_rate, parallel,
min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
check_gt_only, no_gt
]
for component in input_components:
component.change(update_command, inputs=input_components, outputs=command_output)
def on_submit(command):
global is_running
if is_running:
yield "A command is already running. Please wait for it to finish."
return
log_accumulator = []
for log_line in run_bigcodebench(command):
log_accumulator.append(log_line)
yield "\n".join(log_accumulator)
submit_btn.click(stream_logs, inputs=[command_output], outputs=[log_output])
# def update_logs(session_log_file):
# return read_logs(session_log_file)
# demo.load(update_logs, inputs=[session_log_file], outputs=[log_output], every=1)
if __name__ == "__main__":
demo.queue(max_size=300).launch() |