Spaces:
Running
Running
File size: 9,578 Bytes
c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 f56b3a8 c744934 e3ce2e2 c744934 cc5abb7 c744934 e3ce2e2 c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 c744934 cc5abb7 c744934 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
import os
import random
import time
import numpy as np
import gradio as gr
import pandas as pd
import zipfile
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi, snapshot_download
from datasets import load_dataset
from src.utils import load_all_data
from src.md import ABOUT_TEXT, TOP_TEXT, SUBMIT_TEXT
from src.css import custom_css
api = HfApi()
COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
eval_set_repo_id = "KwaiVGI/VideoGen-RewardBench"
eval_set_dir = "dataset"
eval_results_dir = "evals"
def restart_space():
api.restart_space(repo_id=eval_set_repo_id, token=COLLAB_TOKEN)
color_map = {
"Generative": "#7497db",
"Custom Classifiers": "#E8ECF2",
"Seq. Classifiers": "#ffcd75",
"DPO": "#75809c",
}
def color_model_type_column(df, color_map):
"""
Apply color to the 'Model Type' column of the DataFrame based on a given color mapping.
Parameters:
df (pd.DataFrame): The DataFrame containing the 'Model Type' column.
color_map (dict): A dictionary mapping model types to colors.
Returns:
pd.Styler: The styled DataFrame.
"""
# Function to apply color based on the model type
def apply_color(val):
color = color_map.get(val, "default") # Default color if not specified in color_map
return f'background-color: {color}'
# Format for different columns
format_dict = {col: "{:.2f}" for col in df.columns if col not in ['Avg.', 'Model', 'Model Type']}
format_dict['Avg.'] = "{:.2f}"
format_dict[''] = "{:d}"
return df.style.applymap(apply_color, subset=['Model Type']).format(format_dict, na_rep='')
def regex_table(dataframe, regex, filter_button, style=True):
"""
Takes a Model as a regex, then returns only the rows that has that in it.
"""
# Split regex statement by comma and trim whitespace around regexes
regex_list = [x.strip() for x in regex.split(",")]
# Join the list into a single regex pattern with '|' acting as OR
combined_regex = '|'.join(regex_list)
update_scores = False
if isinstance(filter_button, list) or isinstance(filter_button, str):
if "Seq. Classifiers" not in filter_button:
dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifiers", case=False, na=False)]
if "Custom Classifiers" not in filter_button:
dataframe = dataframe[~dataframe["Model Type"].str.contains("Custom Classifiers", case=False, na=False)]
if "Generative" not in filter_button:
dataframe = dataframe[~dataframe["Model Type"].str.contains("Generative", case=False, na=False)]
if "w/o Ties" not in filter_button:
dataframe = dataframe[[col for col in dataframe.columns if "w/o Ties" not in col]]
if "w/ Ties" not in filter_button:
dataframe = dataframe[[col for col in dataframe.columns if "w/ Ties" not in col]]
# Filter the dataframe such that 'model' contains any of the regex patterns
data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)]
data.reset_index(drop=True, inplace=True)
data.insert(0, '', range(len(data)))
data = color_model_type_column(data, color_map)
return data
repo = snapshot_download(
local_dir=eval_set_dir,
repo_id=eval_set_repo_id,
use_auth_token=COLLAB_TOKEN,
tqdm_class=None,
etag_timeout=30,
repo_type="dataset",
)
with zipfile.ZipFile(os.path.join(eval_set_dir, 'videos.zip'), 'r') as zip_ref:
zip_ref.extractall(eval_set_dir)
rewardbench_data = load_all_data(eval_results_dir).sort_values(by='Avg.', ascending=False)
col_types_rewardbench = ["number"] + ["markdown"]+ ["str"] + ["number"] * (len(rewardbench_data.columns) - 1)
# for showing random samples
eval_set = pd.read_csv(os.path.join(eval_set_dir, 'videogen-rewardbench.csv'))
subsets = list(eval_set['prompt'].unique())
# N=20
# if len(subsets) > N:
# random.seed(time.time())
# subsets = random.sample(subsets, N)
def random_sample(selected_prompts):
# Filter the eval_set based on the selected prompts
filtered_data = eval_set[eval_set['prompt'] == selected_prompts]
if filtered_data.empty:
return "No data available for the selected prompt(s)."
# Randomly select a sample from the filtered data
sample = filtered_data.sample(n=1, random_state=int(time.time())).iloc[0]
# Prepare the markdown text with the required fields
markdown_text = f"**Prompt**: {sample['prompt']}\n\n\n"
markdown_text += f"**Preference**: \n"
markdown_text += "| **Visual Quality** | **Motion Quality** | **Text Alignment** | **Overall** | **A_model** | **B_model** |\n"
markdown_text += "|:------------------:|:------------------:|:------------------:|:-----------:|:-----------:|:-----------:|\n"
markdown_text += "| "
markdown_text += f"{'A>B' if sample['VQ'] == 'A' else 'A<B' if sample['VQ'] == 'B' else 'A=B'} | "
markdown_text += f"{'A>B' if sample['MQ'] == 'A' else 'A<B' if sample['MQ'] == 'B' else 'A=B'} | "
markdown_text += f"{'A>B' if sample['TA'] == 'A' else 'A<B' if sample['TA'] == 'B' else 'A=B'} | "
markdown_text += f"{'A>B' if sample['Overall'] == 'A' else 'A<B' if sample['Overall'] == 'B' else 'A=B'} | "
markdown_text += f"{sample['A_model']} | {sample['B_model']} |\n"
# Load and display videos from path_A and path_B
video_a = gr.Video(value=os.path.join(eval_set_dir, sample['path_A']))
video_b = gr.Video(value=os.path.join(eval_set_dir, sample['path_B']))
return markdown_text, video_a, video_b
total_models = len(rewardbench_data)
with gr.Blocks(css=custom_css) as app:
with gr.Row():
with gr.Column(scale=7):
gr.Markdown(TOP_TEXT.format(str(total_models)))
with gr.Column(scale=3):
gr.Markdown("""
<img src="https://i.postimg.cc/rpMSzBnV/logo.png" style="width:800px;" alt="Logo">
""")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π VideoGen-RewardBench Leaderboard"):
with gr.Row():
with gr.Column(scale=4):
search_1 = gr.Textbox(label="Model Search (delimit with , )",
placeholder="Model Search (delimit with , )",
show_label=False)
with gr.Column(scale=6):
model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "Custom Classifiers", "Generative", "w/o Ties", "w/ Ties"],
value=["Seq. Classifiers", "Custom Classifiers", "Generative", "w/o Ties", "w/ Ties"],
label="Model Types",
show_label=False)
with gr.Row():
# reference data
rewardbench_table_hidden = gr.Dataframe(
rewardbench_data,
datatype=col_types_rewardbench,
headers=rewardbench_data.columns.tolist(),
visible=False,
)
rewardbench_table = gr.Dataframe(
regex_table(rewardbench_data.copy(), "", ["Seq. Classifiers", "Custom Classifiers", "Generative", "Others", "w/o Ties", "w/ Ties"]),
datatype=col_types_rewardbench,
headers=rewardbench_data.columns.tolist(),
elem_id="rewardbench_dataframe_avg",
# height=1000,
)
with gr.Row():
gr.Markdown(ABOUT_TEXT)
with gr.TabItem("π€ How to Submit"):
with gr.Row():
gr.Markdown(SUBMIT_TEXT)
with gr.TabItem("π Dataset Viewer"):
with gr.Row():
# loads one sample
gr.Markdown("""## Random Dataset Sample Viewer""")
subset_selector = gr.Dropdown(subsets, label="Subset", value=None, multiselect=False)
button = gr.Button("Show Random Sample")
with gr.Row():
sample_display = gr.Markdown("{sampled data loads here}")
with gr.Row():
video_a_display = gr.Video()
video_b_display = gr.Video()
button.click(fn=random_sample, inputs=[subset_selector], outputs=[sample_display, video_a_display, video_b_display])
search_1.change(regex_table, inputs=[rewardbench_table_hidden, search_1, model_types_1], outputs=rewardbench_table)
model_types_1.change(regex_table, inputs=[rewardbench_table_hidden, search_1, model_types_1], outputs=rewardbench_table)
with gr.Row():
with gr.Accordion("π Citation", open=False):
citation_button = gr.Textbox(
value=r"""@article{liu2025improving,
title={Improving Video Generation with Human Feedback},
author={Liu, Jie and Liu, Gongye and Liang, Jiajun and Yuan, Ziyang and Liu, Xiaokun and Zheng, Mingwu and Wu, Xiele and Wang, Qiulin and Qin, Wenyu and Xia, Menghan and others},
journal={arXiv preprint arXiv:2501.13918},
year={2025}
}""",
lines=5,
label="Copy the following to cite these results.",
elem_id="citation-button",
show_copy_button=True,
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
app.queue(default_concurrency_limit=40).launch() |