import os import re from functools import partial from evaluation.utils.shared import codeact_user_response from openhands.events.action import CmdRunAction, MessageAction def try_parse_answer(act) -> str | None: raw_ans = '' if isinstance(act, MessageAction) and act.source == 'agent': raw_ans = act.content elif isinstance(act, CmdRunAction) and act.source == 'agent': raw_ans = act.thought else: return None agent_answer = re.findall(r'(.*?)', raw_ans, re.DOTALL) if not agent_answer: return None return agent_answer[0].strip() FAKE_RESPONSES = { 'CodeActAgent': partial( codeact_user_response, encapsulate_solution=True, try_parse=try_parse_answer ), } INST_SUFFIXES: dict[str, str] = { 'CodeActAgent': ( 'When you think you have solved the question, ' 'please first send your answer to user through message and then exit.\n' ) } def analysis_size(size_str): size_str = size_str.strip() avails = { 'B': 1, 'Byte': 1, 'K': 1024, 'KB': 1024, 'M': 1024 * 1024, 'MB': 1024 * 1024, 'G': 1024 * 1024 * 1024, 'GB': 1024 * 1024 * 1024, 'T': 1024 * 1024 * 1024 * 1024, 'TB': 1024 * 1024 * 1024 * 1024, 'P': 1024 * 1024 * 1024 * 1024 * 1024, 'PB': 1024 * 1024 * 1024 * 1024 * 1024, } for size_unit in avails: if size_str.endswith(size_unit): return int(size_str[: -len(size_unit)]) * avails[size_unit] return int(size_str) def compare_results(check_method: str, model_answer: str, final_ans: str) -> bool: try: match check_method: case 'check/integer-match.py': return int(model_answer) == int(final_ans) case 'check/size-match.py': return analysis_size(model_answer) == analysis_size(final_ans) return ( model_answer.replace('\r\n', '\n').replace('\r', '\n').strip() == final_ans.replace('\r\n', '\n').replace('\r', '\n').strip() ) except Exception: return False def create_sh_file(filename: str, cmds: str) -> None: with open(filename, 'w', encoding='utf-8') as file: file.write(cmds.replace('\r\n', '\n')) os.chmod(filename, 0o755)