Terry Zhuo commited on
Commit
51ed153
·
1 Parent(s): d683e16
Files changed (1) hide show
  1. __app.py +0 -178
__app.py DELETED
@@ -1,178 +0,0 @@
1
- import gradio as gr
2
- import subprocess
3
- import sys
4
- import os
5
- import threading
6
- import time
7
- import uuid
8
- import glob
9
- import shutil
10
- from pathlib import Path
11
- from apscheduler.schedulers.background import BackgroundScheduler
12
-
13
- default_command = "bigcodebench.evaluate"
14
- is_running = False
15
- lock = threading.Lock()
16
-
17
- def generate_command(
18
- jsonl_file, split, subset, parallel,
19
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
20
- check_gt_only, no_gt
21
- ):
22
- command = [default_command]
23
-
24
- if jsonl_file is not None:
25
- # Copy the uploaded file to the current directory
26
- local_filename = os.path.basename(jsonl_file.name)
27
- shutil.copy(jsonl_file.name, local_filename)
28
- command.extend(["--samples", local_filename])
29
-
30
- command.extend(["--split", split, "--subset", subset])
31
-
32
- if parallel is not None and parallel != 0:
33
- command.extend(["--parallel", str(int(parallel))])
34
-
35
- command.extend([
36
- "--min-time-limit", str(min_time_limit),
37
- "--max-as-limit", str(int(max_as_limit)),
38
- "--max-data-limit", str(int(max_data_limit)),
39
- "--max-stack-limit", str(int(max_stack_limit))
40
- ])
41
-
42
- if check_gt_only:
43
- command.append("--check-gt-only")
44
-
45
- if no_gt:
46
- command.append("--no-gt")
47
-
48
- return " ".join(command)
49
-
50
-
51
- def cleanup_previous_files(jsonl_file):
52
- if jsonl_file is not None:
53
- file_list = ['Dockerfile', 'app.py', 'README.md', os.path.basename(jsonl_file.name), "__pycache__"]
54
- else:
55
- file_list = ['Dockerfile', 'app.py', 'README.md', "__pycache__"]
56
- for file in glob.glob("*"):
57
- try:
58
- if file not in file_list:
59
- os.remove(file)
60
- except Exception as e:
61
- print(f"Error during cleanup of {file}: {e}")
62
-
63
- def find_result_file():
64
- json_files = glob.glob("*.json")
65
- if json_files:
66
- return max(json_files, key=os.path.getmtime)
67
- return None
68
-
69
- def run_bigcodebench(command):
70
- global is_running
71
- with lock:
72
- if is_running:
73
- yield "A command is already running. Please wait for it to finish.\n"
74
- return
75
- is_running = True
76
-
77
- try:
78
- yield f"Executing command: {command}\n"
79
-
80
- process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
81
-
82
- for line in process.stdout:
83
- yield line
84
-
85
- # process.wait()
86
-
87
- if process.returncode != 0:
88
- yield f"Error: Command exited with status {process.returncode}\n"
89
-
90
- yield "Evaluation completed.\n"
91
-
92
- result_file = find_result_file()
93
- if result_file:
94
- yield f"Result file found: {result_file}\n"
95
- else:
96
- yield "No result file found.\n"
97
- finally:
98
- with lock:
99
- is_running = False
100
-
101
- def stream_logs(command, jsonl_file=None):
102
- global is_running
103
-
104
- if is_running:
105
- yield "A command is already running. Please wait for it to finish.\n"
106
- return
107
-
108
- cleanup_previous_files(jsonl_file)
109
- yield "Cleaned up previous files.\n"
110
-
111
- log_content = []
112
- for log_line in run_bigcodebench(command):
113
- log_content.append(log_line)
114
- yield "".join(log_content)
115
-
116
- with gr.Blocks() as demo:
117
- gr.Markdown("# BigCodeBench Evaluator")
118
-
119
- with gr.Row():
120
- jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
121
- split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
122
- subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
123
-
124
- with gr.Row():
125
- parallel = gr.Number(label="Parallel (optional)", precision=0)
126
- min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
127
- max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
128
-
129
- with gr.Row():
130
- max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
131
- max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
132
- check_gt_only = gr.Checkbox(label="Check GT Only")
133
- no_gt = gr.Checkbox(label="No GT")
134
-
135
- command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
136
- with gr.Row():
137
- submit_btn = gr.Button("Run Evaluation")
138
- download_btn = gr.DownloadButton(label="Download Result")
139
- log_output = gr.Textbox(label="Execution Logs", lines=20)
140
-
141
- input_components = [
142
- jsonl_file, split, subset, parallel,
143
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
144
- check_gt_only, no_gt
145
- ]
146
-
147
- for component in input_components:
148
- component.change(generate_command, inputs=input_components, outputs=command_output)
149
-
150
-
151
- def start_evaluation(command, jsonl_file, subset, split):
152
- extra = subset + "_" if subset != "full" else ""
153
- if jsonl_file is not None:
154
- result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
155
- else:
156
- result_path = None
157
-
158
- for log in stream_logs(command, jsonl_file):
159
- if jsonl_file is not None:
160
- yield log, gr.update(value=result_path, label=result_path), gr.update()
161
- else:
162
- yield log, gr.update(), gr.update()
163
- is_running = False
164
- result_file = find_result_file()
165
- if result_file:
166
- return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
167
- # gr.Button(visible=False)#,
168
- # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
169
- else:
170
- return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
171
- # gr.Button("Run Evaluation", visible=True),
172
- # gr.DownloadButton(visible=False))
173
- submit_btn.click(start_evaluation,
174
- inputs=[command_output, jsonl_file, subset, split],
175
- outputs=[log_output, download_btn])
176
-
177
- demo.queue(max_size=300).launch(share=True, server_name="0.0.0.0", server_port=7860)
178
- scheduler = BackgroundScheduler()