Spaces:
Running
Running
import fire | |
import pandas as pd | |
from DABench import DABench | |
from metagpt.logs import logger | |
from metagpt.roles.di.data_interpreter import DataInterpreter | |
from metagpt.utils.recovery_util import save_history | |
async def main(): | |
"""Evaluate all""" | |
bench = DABench() | |
id_list, predictions, labels, is_true = [], [], [], [] | |
for key, value in bench.answers.items(): | |
id_list.append(key) | |
labels.append(str(bench.get_answer(key))) | |
try: | |
requirement = bench.generate_formatted_prompt(key) | |
di = DataInterpreter() | |
result = await di.run(requirement) | |
logger.info(result) | |
save_history(role=di) | |
temp_prediction, temp_istrue = bench.eval(key, str(result)) | |
is_true.append(str(temp_istrue)) | |
predictions.append(str(temp_prediction)) | |
except: | |
is_true.append(str(bench.eval(key, ""))) | |
predictions.append(str("")) | |
df = pd.DataFrame({"Label": labels, "Prediction": predictions, "T/F": is_true}) | |
df.to_excel("DABench_output.xlsx", index=False) | |
logger.info(bench.eval_all(id_list, predictions)) | |
if __name__ == "__main__": | |
fire.Fire(main) | |