giulio98 commited on
Commit
a07b52e
·
1 Parent(s): d10963a

Update execute.py

Browse files
Files changed (1) hide show
  1. execute.py +252 -251
execute.py CHANGED
@@ -1,252 +1,253 @@
1
- # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- # This code is adapted from OpenAI's release
16
- # https://github.com/openai/human-eval/blob/master/human_eval/execution.py
17
-
18
- import contextlib
19
- import faulthandler
20
- import io
21
- import multiprocessing
22
- import os
23
- import platform
24
- import signal
25
- import tempfile
26
- import sys
27
- from io import StringIO
28
- import contextlib
29
-
30
-
31
- def check_correctness(check_program, output, timeout, task_id, completion_id):
32
- """
33
- Evaluates the functional correctness of a completion by running the test
34
- suite provided in the problem.
35
- :param completion_id: an optional completion ID so we can match
36
- the results later even if execution finishes asynchronously.
37
- """
38
- manager = multiprocessing.Manager()
39
- result = manager.list()
40
-
41
- p = multiprocessing.Process(target=unsafe_execute, args=(check_program, output, result, timeout))
42
- p.start()
43
- p.join(timeout=timeout + 1)
44
- if p.is_alive():
45
- p.kill()
46
-
47
- if not result:
48
- result.append("timed out")
49
-
50
- return dict(
51
- task_id=task_id,
52
- passed=result[0] == "passed",
53
- result=result[0],
54
- completion_id=completion_id,
55
- )
56
-
57
-
58
-
59
-
60
- def unsafe_execute(check_program, output, result, timeout):
61
-
62
- with create_tempdir():
63
-
64
- # These system calls are needed when cleaning up tempdir.
65
- import os
66
- import shutil
67
-
68
- rmtree = shutil.rmtree
69
- rmdir = os.rmdir
70
- chdir = os.chdir
71
-
72
- # Disable functionalities that can make destructive changes to the test.
73
- reliability_guard()
74
-
75
- # Run program.
76
- try:
77
- exec_globals = {}
78
- with swallow_io():
79
- with time_limit(timeout):
80
- with stdoutIO() as s:
81
- exec(check_program, exec_globals)
82
- if(s.getvalue() == output):
83
- result.append("passed")
84
- else:
85
- result.append("not passed")
86
- except TimeoutException:
87
- result.append("timed out")
88
- except BaseException as e:
89
- result.append(f"failed: {e}")
90
-
91
- # Needed for cleaning up.
92
- shutil.rmtree = rmtree
93
- os.rmdir = rmdir
94
- os.chdir = chdir
95
-
96
- @contextlib.contextmanager
97
- def stdoutIO(stdout=None):
98
- old = sys.stdout
99
- if stdout is None:
100
- stdout = StringIO()
101
- sys.stdout = stdout
102
- yield stdout
103
- sys.stdout = old
104
-
105
-
106
- @contextlib.contextmanager
107
- def time_limit(seconds):
108
- def signal_handler(signum, frame):
109
- raise TimeoutException("Timed out!")
110
-
111
- signal.setitimer(signal.ITIMER_REAL, seconds)
112
- signal.signal(signal.SIGALRM, signal_handler)
113
- try:
114
- yield
115
- finally:
116
- signal.setitimer(signal.ITIMER_REAL, 0)
117
-
118
-
119
- @contextlib.contextmanager
120
- def swallow_io():
121
- stream = WriteOnlyStringIO()
122
- with contextlib.redirect_stdout(stream):
123
- with contextlib.redirect_stderr(stream):
124
- with redirect_stdin(stream):
125
- yield
126
-
127
-
128
- @contextlib.contextmanager
129
- def create_tempdir():
130
- with tempfile.TemporaryDirectory() as dirname:
131
- with chdir(dirname):
132
- yield dirname
133
-
134
-
135
- class TimeoutException(Exception):
136
- pass
137
-
138
-
139
- class WriteOnlyStringIO(io.StringIO):
140
- """StringIO that throws an exception when it's read from"""
141
-
142
- def read(self, *args, **kwargs):
143
- raise OSError
144
-
145
- def readline(self, *args, **kwargs):
146
- raise OSError
147
-
148
- def readlines(self, *args, **kwargs):
149
- raise OSError
150
-
151
- def readable(self, *args, **kwargs):
152
- """Returns True if the IO object can be read."""
153
- return False
154
-
155
-
156
- class redirect_stdin(contextlib._RedirectStream): # type: ignore
157
- _stream = "stdin"
158
-
159
-
160
- @contextlib.contextmanager
161
- def chdir(root):
162
- if root == ".":
163
- yield
164
- return
165
- cwd = os.getcwd()
166
- os.chdir(root)
167
- try:
168
- yield
169
- except BaseException as exc:
170
- raise exc
171
- finally:
172
- os.chdir(cwd)
173
-
174
-
175
- def reliability_guard(maximum_memory_bytes=None):
176
- """
177
- This disables various destructive functions and prevents the generated code
178
- from interfering with the test (e.g. fork bomb, killing other processes,
179
- removing filesystem files, etc.)
180
- WARNING
181
- This function is NOT a security sandbox. Untrusted code, including, model-
182
- generated code, should not be blindly executed outside of one. See the
183
- Codex paper for more information about OpenAI's code sandbox, and proceed
184
- with caution.
185
- """
186
-
187
- if maximum_memory_bytes is not None:
188
- import resource
189
-
190
- resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
191
- resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
192
- if not platform.uname().system == "Darwin":
193
- resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
194
-
195
- faulthandler.disable()
196
-
197
- import builtins
198
-
199
- builtins.exit = None
200
- builtins.quit = None
201
-
202
- import os
203
-
204
- os.environ["OMP_NUM_THREADS"] = "1"
205
-
206
- os.kill = None
207
- os.system = None
208
- os.putenv = None
209
- os.remove = None
210
- os.removedirs = None
211
- os.rmdir = None
212
- os.fchdir = None
213
- os.setuid = None
214
- os.fork = None
215
- os.forkpty = None
216
- os.killpg = None
217
- os.rename = None
218
- os.renames = None
219
- os.truncate = None
220
- os.replace = None
221
- os.unlink = None
222
- os.fchmod = None
223
- os.fchown = None
224
- os.chmod = None
225
- os.chown = None
226
- os.chroot = None
227
- os.fchdir = None
228
- os.lchflags = None
229
- os.lchmod = None
230
- os.lchown = None
231
- os.getcwd = None
232
- os.chdir = None
233
-
234
- import shutil
235
-
236
- shutil.rmtree = None
237
- shutil.move = None
238
- shutil.chown = None
239
-
240
- import subprocess
241
-
242
- subprocess.Popen = None # type: ignore
243
-
244
- __builtins__["help"] = None
245
-
246
- import sys
247
-
248
- sys.modules["ipdb"] = None
249
- sys.modules["joblib"] = None
250
- sys.modules["resource"] = None
251
- sys.modules["psutil"] = None
 
252
  sys.modules["tkinter"] = None
 
1
+ # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # This code is adapted from OpenAI's release
16
+ # https://github.com/openai/human-eval/blob/master/human_eval/execution.py
17
+
18
+ import contextlib
19
+ import faulthandler
20
+ import io
21
+ import multiprocessing
22
+ import os
23
+ import platform
24
+ import signal
25
+ import tempfile
26
+ import sys
27
+ from io import StringIO
28
+ import contextlib
29
+
30
+
31
+ def check_correctness(check_program, output, timeout, task_id, completion_id):
32
+ """
33
+ Evaluates the functional correctness of a completion by running the test
34
+ suite provided in the problem.
35
+ :param completion_id: an optional completion ID so we can match
36
+ the results later even if execution finishes asynchronously.
37
+ """
38
+ manager = multiprocessing.Manager()
39
+ result = manager.list()
40
+
41
+ p = multiprocessing.Process(target=unsafe_execute, args=(check_program, output, result, timeout))
42
+ p.start()
43
+ p.join(timeout=timeout + 1)
44
+ if p.is_alive():
45
+ p.kill()
46
+
47
+ if not result:
48
+ result.append("timed out")
49
+
50
+ return dict(
51
+ task_id=task_id,
52
+ passed=result[0] == "passed",
53
+ result=result[0],
54
+ completion_id=completion_id,
55
+ )
56
+
57
+
58
+
59
+
60
+ def unsafe_execute(check_program, output, result, timeout):
61
+
62
+ with create_tempdir():
63
+
64
+ # These system calls are needed when cleaning up tempdir.
65
+ import os
66
+ import shutil
67
+
68
+ rmtree = shutil.rmtree
69
+ rmdir = os.rmdir
70
+ chdir = os.chdir
71
+
72
+ # Disable functionalities that can make destructive changes to the test.
73
+ reliability_guard()
74
+
75
+ # Run program.
76
+ try:
77
+ exec_globals = {}
78
+ with swallow_io():
79
+ with time_limit(timeout):
80
+ with stdoutIO() as s:
81
+ exec(check_program, exec_globals)
82
+ if(s.getvalue() == output):
83
+ print(s.getvalue())
84
+ result.append("passed")
85
+ else:
86
+ result.append("not passed")
87
+ except TimeoutException:
88
+ result.append("timed out")
89
+ except BaseException as e:
90
+ result.append(f"failed: {e}")
91
+
92
+ # Needed for cleaning up.
93
+ shutil.rmtree = rmtree
94
+ os.rmdir = rmdir
95
+ os.chdir = chdir
96
+
97
+ @contextlib.contextmanager
98
+ def stdoutIO(stdout=None):
99
+ old = sys.stdout
100
+ if stdout is None:
101
+ stdout = StringIO()
102
+ sys.stdout = stdout
103
+ yield stdout
104
+ sys.stdout = old
105
+
106
+
107
+ @contextlib.contextmanager
108
+ def time_limit(seconds):
109
+ def signal_handler(signum, frame):
110
+ raise TimeoutException("Timed out!")
111
+
112
+ signal.setitimer(signal.ITIMER_REAL, seconds)
113
+ signal.signal(signal.SIGALRM, signal_handler)
114
+ try:
115
+ yield
116
+ finally:
117
+ signal.setitimer(signal.ITIMER_REAL, 0)
118
+
119
+
120
+ @contextlib.contextmanager
121
+ def swallow_io():
122
+ stream = WriteOnlyStringIO()
123
+ with contextlib.redirect_stdout(stream):
124
+ with contextlib.redirect_stderr(stream):
125
+ with redirect_stdin(stream):
126
+ yield
127
+
128
+
129
+ @contextlib.contextmanager
130
+ def create_tempdir():
131
+ with tempfile.TemporaryDirectory() as dirname:
132
+ with chdir(dirname):
133
+ yield dirname
134
+
135
+
136
+ class TimeoutException(Exception):
137
+ pass
138
+
139
+
140
+ class WriteOnlyStringIO(io.StringIO):
141
+ """StringIO that throws an exception when it's read from"""
142
+
143
+ def read(self, *args, **kwargs):
144
+ raise OSError
145
+
146
+ def readline(self, *args, **kwargs):
147
+ raise OSError
148
+
149
+ def readlines(self, *args, **kwargs):
150
+ raise OSError
151
+
152
+ def readable(self, *args, **kwargs):
153
+ """Returns True if the IO object can be read."""
154
+ return False
155
+
156
+
157
+ class redirect_stdin(contextlib._RedirectStream): # type: ignore
158
+ _stream = "stdin"
159
+
160
+
161
+ @contextlib.contextmanager
162
+ def chdir(root):
163
+ if root == ".":
164
+ yield
165
+ return
166
+ cwd = os.getcwd()
167
+ os.chdir(root)
168
+ try:
169
+ yield
170
+ except BaseException as exc:
171
+ raise exc
172
+ finally:
173
+ os.chdir(cwd)
174
+
175
+
176
+ def reliability_guard(maximum_memory_bytes=None):
177
+ """
178
+ This disables various destructive functions and prevents the generated code
179
+ from interfering with the test (e.g. fork bomb, killing other processes,
180
+ removing filesystem files, etc.)
181
+ WARNING
182
+ This function is NOT a security sandbox. Untrusted code, including, model-
183
+ generated code, should not be blindly executed outside of one. See the
184
+ Codex paper for more information about OpenAI's code sandbox, and proceed
185
+ with caution.
186
+ """
187
+
188
+ if maximum_memory_bytes is not None:
189
+ import resource
190
+
191
+ resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
192
+ resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
193
+ if not platform.uname().system == "Darwin":
194
+ resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
195
+
196
+ faulthandler.disable()
197
+
198
+ import builtins
199
+
200
+ builtins.exit = None
201
+ builtins.quit = None
202
+
203
+ import os
204
+
205
+ os.environ["OMP_NUM_THREADS"] = "1"
206
+
207
+ os.kill = None
208
+ os.system = None
209
+ os.putenv = None
210
+ os.remove = None
211
+ os.removedirs = None
212
+ os.rmdir = None
213
+ os.fchdir = None
214
+ os.setuid = None
215
+ os.fork = None
216
+ os.forkpty = None
217
+ os.killpg = None
218
+ os.rename = None
219
+ os.renames = None
220
+ os.truncate = None
221
+ os.replace = None
222
+ os.unlink = None
223
+ os.fchmod = None
224
+ os.fchown = None
225
+ os.chmod = None
226
+ os.chown = None
227
+ os.chroot = None
228
+ os.fchdir = None
229
+ os.lchflags = None
230
+ os.lchmod = None
231
+ os.lchown = None
232
+ os.getcwd = None
233
+ os.chdir = None
234
+
235
+ import shutil
236
+
237
+ shutil.rmtree = None
238
+ shutil.move = None
239
+ shutil.chown = None
240
+
241
+ import subprocess
242
+
243
+ subprocess.Popen = None # type: ignore
244
+
245
+ __builtins__["help"] = None
246
+
247
+ import sys
248
+
249
+ sys.modules["ipdb"] = None
250
+ sys.modules["joblib"] = None
251
+ sys.modules["resource"] = None
252
+ sys.modules["psutil"] = None
253
  sys.modules["tkinter"] = None