from typing import Callable, Literal, List, Tuple import json import pandas as pd import matplotlib.figure from IPython.display import Markdown from funix import funix, import_theme from vectara_theme import vectara_theme import_theme(vectara_theme) from app_utils import load_results, visualize_leaderboard results_df = load_results() @funix( title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard", direction="column", autorun="always", theme="vectara", matplotlib_format="svg", # output_layout=[ # [{"return_index": 0, "width": 0.3}], # [{"return_index": 1, "width": 0.7}], # ] ) def leaderboard( filter_models_by_name: str = "" # filter_models_by_name: List[Literal["all", "anthropic", "google", "meta", "openai", "xai", "qwen"]] = ["all"] ) -> Tuple[Markdown, matplotlib.figure.Figure, pd.DataFrame]: # ) -> Tuple[Markdown, pd.DataFrame]: """# Hughes Hallucination Evaluation Model (HHEM) Leaderboard Using [Vectara](https://vectara.com/)'s proprietary [HHEM](https://www.vectara.com/blog/hhem-2-1-a-better-hallucination-detection-model), this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document. For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates. HHEM's open source version is available [here](https://huggingface.co/vectara/hallucination_evaluation_model). For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard). **Usage:** * All LLMs are displayed by default. To filter, enter the names of the models that you want to see in the "Filter Models by Name" field below, separated by commas or semicolons. * Results are paginated. To page thru, use the `<` or `>` buttons at the bottom right corner of the table. * To sort the table, hover over a column header and click the arrow. The arrow automatically points up and down depending on the sort order. * Click the "Refresh" button to refresh the leaderboard if the table is not shown or does not update when you change the filter. Args: filter_models_by_name: filter models by name using comma-separated strings """ df = results_df filter_models_by_name = filter_models_by_name.replace(",", ";").replace(" ", "") if len(filter_models_by_name) > 0 and "all" not in filter_models_by_name: filter_models_by_name = filter_models_by_name.split(";") # filter_models_by_name = [name for name in filter_models_by_name if name != "all"] filter_models_by_name = [name for name in filter_models_by_name if name != ""] df = df.copy() df = df[df["LLM_lower_case"].str.contains("|".join(filter_models_by_name), na=False)] if len(df) == 0: # return an empty DF and an empty figure return Markdown(f"No models found matching: {filter_models_by_name}"), matplotlib.figure.Figure(), pd.DataFrame() # return Markdown(""), df fig = visualize_leaderboard(df) return Markdown(""), fig, df[["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]]