|
import os |
|
import json |
|
import tiktoken |
|
from alpaca_eval import utils, metrics, annotators, constants, analyze, plotting, main |
|
from alpaca_eval.metrics.glm_winrate import get_length_controlled_winrate |
|
import os |
|
import pandas as pd |
|
import json |
|
|
|
|
|
|
|
TOP_LEVEL_DIRECTORY = "submodules/alpaca_eval/results" |
|
|
|
|
|
model_dataframes_outputs = {} |
|
|
|
|
|
for model_name in os.listdir(TOP_LEVEL_DIRECTORY): |
|
model_dir = os.path.join(TOP_LEVEL_DIRECTORY, model_name) |
|
if os.path.isdir(model_dir): |
|
model_output_file = os.path.join(model_dir, "model_outputs.json") |
|
if os.path.exists(model_output_file): |
|
model_dataframes_outputs[model_name] = pd.read_json(model_output_file) |
|
|
|
|
|
def get_num_words(text): |
|
return len(text.split()) |
|
|
|
|
|
ENCODING = tiktoken.get_encoding("cl100k_base") |
|
|
|
|
|
def get_num_tokens(text): |
|
"""Uses tiktoken to get the number of tokens in the text.""" |
|
try: |
|
return len(ENCODING.encode(str(text))) |
|
except: |
|
breakpoint() |
|
|
|
|
|
model_name_to_num_words = {} |
|
model_name_to_num_tokens = {} |
|
for model_name, model_dataframe in model_dataframes_outputs.items(): |
|
print(f"model_name_to_num_words for {model_name}") |
|
model_dataframe["output_num_words"] = model_dataframe["output"].apply(get_num_words) |
|
model_dataframe["output_num_tokens"] = model_dataframe["output"].apply( |
|
get_num_tokens |
|
) |
|
model_name_to_num_words[model_name] = { |
|
"mean": int(model_dataframe["output_num_words"].mean()), |
|
"std": int(model_dataframe["output_num_words"].std()), |
|
} |
|
model_name_to_num_tokens[model_name] = { |
|
"mean": int(model_dataframe["output_num_tokens"].mean()), |
|
"std": int(model_dataframe["output_num_tokens"].std()), |
|
} |
|
|
|
num_words_df = pd.DataFrame(model_name_to_num_words).T |
|
num_tokens_df = pd.DataFrame(model_name_to_num_tokens).T |
|
|
|
model_name_to_win_rate = {} |
|
for model_name in os.listdir(TOP_LEVEL_DIRECTORY): |
|
print(f"model_name_to_win_rate for {model_name}") |
|
model_dir = os.path.join(TOP_LEVEL_DIRECTORY, model_name) |
|
if os.path.isdir(model_dir): |
|
model_output_file = os.path.join( |
|
model_dir, "weighted_alpaca_eval_gpt4_turbo", "annotations.json" |
|
) |
|
if os.path.exists(model_output_file): |
|
model_dataframe = pd.read_json(model_output_file) |
|
model_name_to_win_rate[model_name] = get_length_controlled_winrate( |
|
model_dataframe |
|
) |
|
|
|
win_rate_df = pd.DataFrame(model_name_to_win_rate).T |
|
|
|
df = num_words_df.join(win_rate_df, how="inner") |
|
df = df.rename( |
|
columns={ |
|
"mean": "num_words_mean", |
|
"std": "num_words_std", |
|
} |
|
) |
|
df = df.join(num_tokens_df, how="inner") |
|
df = df.rename( |
|
columns={ |
|
"mean": "num_tokens_mean", |
|
"std": "num_tokens_std", |
|
} |
|
) |
|
|
|
df.to_json("data/model_win_rates.json") |
|
|