Spaces:
Build error
Build error
| from glob import glob | |
| from sklearn.metrics import accuracy_score | |
| import os | |
| import pandas as pd | |
| def get_merged_df(results_path, skip_samples = False): | |
| results = glob(os.path.join(results_path, "*.json")) | |
| dfs = [] | |
| for r in results: | |
| if skip_samples and 'sample_result' in r: | |
| continue | |
| df = pd.read_json(r, lines = True) | |
| if df.isna().values.any(): | |
| print(f"Missing values in {r}") | |
| else: | |
| dfs.append(df) | |
| full_df = pd.concat(dfs) | |
| return full_df | |
| def map_df(full_df): | |
| gnd_truth_mapping = {'full fake': 0, | |
| 'half fake': 0, | |
| 'real': 1} | |
| pred_mapping = {'fake':0, 'real': 1} | |
| full_df['label'] = full_df['ground_truth'].map(gnd_truth_mapping) | |
| full_df['pred'] = full_df['type'].map(pred_mapping) | |
| return full_df | |
| def get_scores(df): | |
| columns = ['Under 25s', '26s - 55s', '56s - 125s', 'Overall' ] | |
| samples_tested = [] | |
| acc_scores = [] | |
| for c in columns: | |
| if c == 'Overall': | |
| mask = df.label == 0 | |
| elif c == 'Under 25s': | |
| mask = (df.label == 0) & (df.duration < 26) | |
| elif c == '26s - 55s': | |
| mask = (df.label == 0) & (df.duration >= 26) & (df.duration < 56) | |
| elif c == '56s - 125s': | |
| mask = (df.label == 0) & (df.duration >= 56) & (df.duration < 126) | |
| else: | |
| raise ValueError | |
| sel_df = df[mask] | |
| samples_tested.append(len(sel_df)) | |
| acc_scores.append(round(accuracy_score(sel_df.label.values, sel_df.pred.values), 3)) | |
| lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores}) | |
| return lb | |
| def build_leaderboard(results_path = 'results'): | |
| full_df = get_merged_df(results_path) | |
| full_df_mapped = map_df(full_df) | |
| leaderboard = get_scores(full_df_mapped) | |
| return leaderboard | |