File size: 2,411 Bytes
246d201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import re
from functools import partial

from evaluation.utils.shared import codeact_user_response
from openhands.events.action import CmdRunAction, MessageAction


def try_parse_answer(act) -> str | None:
    raw_ans = ''
    if isinstance(act, MessageAction) and act.source == 'agent':
        raw_ans = act.content
    elif isinstance(act, CmdRunAction) and act.source == 'agent':
        raw_ans = act.thought
    else:
        return None
    agent_answer = re.findall(r'<solution>(.*?)</solution>', raw_ans, re.DOTALL)
    if not agent_answer:
        return None
    return agent_answer[0].strip()


FAKE_RESPONSES = {
    'CodeActAgent': partial(
        codeact_user_response, encapsulate_solution=True, try_parse=try_parse_answer
    ),
}

INST_SUFFIXES: dict[str, str] = {
    'CodeActAgent': (
        'When you think you have solved the question, '
        'please first send your answer to user through message and then exit.\n'
    )
}


def analysis_size(size_str):
    size_str = size_str.strip()
    avails = {
        'B': 1,
        'Byte': 1,
        'K': 1024,
        'KB': 1024,
        'M': 1024 * 1024,
        'MB': 1024 * 1024,
        'G': 1024 * 1024 * 1024,
        'GB': 1024 * 1024 * 1024,
        'T': 1024 * 1024 * 1024 * 1024,
        'TB': 1024 * 1024 * 1024 * 1024,
        'P': 1024 * 1024 * 1024 * 1024 * 1024,
        'PB': 1024 * 1024 * 1024 * 1024 * 1024,
    }
    for size_unit in avails:
        if size_str.endswith(size_unit):
            return int(size_str[: -len(size_unit)]) * avails[size_unit]
    return int(size_str)


def compare_results(check_method: str, model_answer: str, final_ans: str) -> bool:
    try:
        match check_method:
            case 'check/integer-match.py':
                return int(model_answer) == int(final_ans)
            case 'check/size-match.py':
                return analysis_size(model_answer) == analysis_size(final_ans)
        return (
            model_answer.replace('\r\n', '\n').replace('\r', '\n').strip()
            == final_ans.replace('\r\n', '\n').replace('\r', '\n').strip()
        )
    except Exception:
        return False


def create_sh_file(filename: str, cmds: str) -> None:
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(cmds.replace('\r\n', '\n'))
    os.chmod(filename, 0o755)