File size: 2,416 Bytes
58b9de9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7b7dc6
 
58b9de9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7b7dc6
 
 
 
 
 
 
 
58b9de9
 
 
 
 
 
 
 
 
 
 
 
 
 
d7b7dc6
 
 
58b9de9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def generate_prompt(source_passage: str) -> str:
    """
    Generates a prompt for a chatbot to summarize a given passage.

    Args:
        source_passage (str): The passage to be summarized.

    Returns:
        str: A formatted prompt string for the chatbot.
    """
    if not source_passage:
        raise ValueError("Source passage is empty.")

    return f"""You are a chat bot answering questions using data. You must stick to the answers provided solely by the text in the passage provided.
    You are asked the question 'Provide a concise summary of the following passage, covering the core pieces of information described:'
    Passage:\n {source_passage}
    """


def format_results(model_name: str, revision: str, precision: str, accuracy: float,
                hallucination_rate: float, answer_rate: float, avg_summary_len: float,
                error_rate: float) -> dict:
    """
    Formats the evaluation results into a structured dictionary.

    Args:
        model_name (str): The name of the evaluated model.
        revision (str): The revision hash of the model.
        precision (str): The precision with which the evaluation was run.
        accuracy (float): The accuracy score from the evaluation.
        hallucination_rate (float): The hallucination rate from the evaluation.
        answer_rate (float): The answer rate from the evaluation.
        avg_summary_len (float): The average summary length from the evaluation.
        error_rate (float): The rate at which errors occurred during summary generation.

    Returns:
        dict: A dictionary containing the structured evaluation results.
    """
    results = {
        "config": {
            "model_dtype": precision, # Precision with which you ran the evaluation
            "model_name": model_name, # Name of the model
            "model_sha": revision # Hash of the model 
        },
        "results": {
            "accuracy": {
                "accuracy": accuracy
            },
            "hallucination_rate": {
                "hallucination_rate": hallucination_rate
            },
            "answer_rate": {
                "answer_rate": answer_rate
            },
            "average_summary_length": {
                "average_summary_length": avg_summary_len
            },
            "error_rate": {
                "error_rate": error_rate
            }
        }
    }

    return results