File size: 1,549 Bytes
d7b7dc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd

from leaderboard.src.backend.model_operations import SummaryGenerator, EvaluationModel
from envs import HEM_PATH, SOURCE_PATH
from leaderboard.src.backend.util import load_dataframe, format_results

class Evaluator:
    def __init__(self, model, revision, precision, num_fewshot, batch_size, device, no_cache, limit, write_out=True, output_base_path='logs'):
        self.model = model
        self.revision = revision
        self.precision = precision
        self.num_fewshot = num_fewshot
        self.batch_size = batch_size
        self.device = device
        self.no_cache = no_cache
        self.limit = limit
        self.write_out = write_out
        self.output_base_path = output_base_path
        self.summary_generator = SummaryGenerator(model, revision)
        self.eval_model = EvaluationModel(HEM_PATH)
    
    def evaluate(self):
        df = load_dataframe(SOURCE_PATH)
        generated_summaries_df = self.summary_generator.generate_summaries(df)
        
        avg_summary_len = self.summary_generator.avg_length
        answer_rate = self.summary_generator.answer_rate
        
        hallucination_scores = self.eval_model.evaluate_hallucination(generated_summaries_df)
        
        accuracy = self.eval_model.compute_accuracy
        hallucination_rate = self.eval_model.hallucination_rate
        
        results = format_results(hallucination_scores, self.model, self.revision, self.precision, accuracy, hallucination_rate, answer_rate, avg_summary_len)
        
        return results