Spaces:
Running
Running
File size: 1,197 Bytes
fe5c39d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import fire
import pandas as pd
from DABench import DABench
from metagpt.logs import logger
from metagpt.roles.di.data_interpreter import DataInterpreter
from metagpt.utils.recovery_util import save_history
async def main():
"""Evaluate all"""
bench = DABench()
id_list, predictions, labels, is_true = [], [], [], []
for key, value in bench.answers.items():
id_list.append(key)
labels.append(str(bench.get_answer(key)))
try:
requirement = bench.generate_formatted_prompt(key)
di = DataInterpreter()
result = await di.run(requirement)
logger.info(result)
save_history(role=di)
temp_prediction, temp_istrue = bench.eval(key, str(result))
is_true.append(str(temp_istrue))
predictions.append(str(temp_prediction))
except:
is_true.append(str(bench.eval(key, "")))
predictions.append(str(""))
df = pd.DataFrame({"Label": labels, "Prediction": predictions, "T/F": is_true})
df.to_excel("DABench_output.xlsx", index=False)
logger.info(bench.eval_all(id_list, predictions))
if __name__ == "__main__":
fire.Fire(main)
|