Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,549 Bytes
d7b7dc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import pandas as pd
from leaderboard.src.backend.model_operations import SummaryGenerator, EvaluationModel
from envs import HEM_PATH, SOURCE_PATH
from leaderboard.src.backend.util import load_dataframe, format_results
class Evaluator:
def __init__(self, model, revision, precision, num_fewshot, batch_size, device, no_cache, limit, write_out=True, output_base_path='logs'):
self.model = model
self.revision = revision
self.precision = precision
self.num_fewshot = num_fewshot
self.batch_size = batch_size
self.device = device
self.no_cache = no_cache
self.limit = limit
self.write_out = write_out
self.output_base_path = output_base_path
self.summary_generator = SummaryGenerator(model, revision)
self.eval_model = EvaluationModel(HEM_PATH)
def evaluate(self):
df = load_dataframe(SOURCE_PATH)
generated_summaries_df = self.summary_generator.generate_summaries(df)
avg_summary_len = self.summary_generator.avg_length
answer_rate = self.summary_generator.answer_rate
hallucination_scores = self.eval_model.evaluate_hallucination(generated_summaries_df)
accuracy = self.eval_model.compute_accuracy
hallucination_rate = self.eval_model.hallucination_rate
results = format_results(hallucination_scores, self.model, self.revision, self.precision, accuracy, hallucination_rate, answer_rate, avg_summary_len)
return results
|