File size: 1,585 Bytes
c87c295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import math


def check_id(data, task_id):
    assert data[task_id]["task_id"] == f"HumanEval/{task_id}"


def check_valid(s: str):
    cnt = 0
    for ch in s:
        if ch == "(":
            cnt += 1
        elif ch == ")":
            cnt -= 1
        else:
            return False
        if cnt < 0:
            return False
    return cnt == 0


def fix(data):
    check_id(data, 126)
    data[126]["contract"] = (
        '\n    assert type(lst) == list, "invalid inputs" # $_CONTRACT_$'
        + '\n    assert all(type(x) == int and x >= 0 for x in lst), "invalid inputs" # $_CONTRACT_$\n'
    )
    data[126]["plus_input"] = [
        l
        for l in data[126]["plus_input"]
        if type(l[0]) == list and all(type(x) == int and x >= 0 for x in l[0])
    ]

    check_id(data, 6)
    data[6]["contract"] += '    assert cnt == 0, "invalid inputs"\n'
    data[6]["plus_input"] = [l for l in data[6]["plus_input"] if check_valid(l[0])]

    return data


if __name__ == "__main__":
    import json

    with open("HumanEvalPlus-v0.1.5.jsonl") as f:
        data = [json.loads(line) for line in f.readlines() if line]

    data = fix(data)

    with open("HumanEvalPlus-v0.1.6.jsonl", "wb") as f:
        for x in data:
            f.write((json.dumps(x) + "\n").encode("utf-8"))

    with open("HumanEvalPlus-Mini-v0.1.5.jsonl") as f:
        data = [json.loads(line) for line in f.readlines() if line]

    data = fix(data)
    with open("HumanEvalPlus-Mini-v0.1.6.jsonl", "wb") as f:
        for x in data:
            f.write((json.dumps(x) + "\n").encode("utf-8"))