File size: 9,578 Bytes
c744934
 
 
 
cc5abb7
 
c744934
cc5abb7
c744934
 
 
 
 
cc5abb7
c744934
 
 
 
 
 
cc5abb7
 
f56b3a8
c744934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3ce2e2
 
c744934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5abb7
c744934
 
 
e3ce2e2
c744934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5abb7
c744934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5abb7
c744934
cc5abb7
c744934
cc5abb7
c744934
cc5abb7
c744934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5abb7
c744934
 
 
 
 
 
 
 
 
 
 
 
 
 
cc5abb7
 
 
 
c744934
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import os
import random
import time
import numpy as np
import gradio as gr
import pandas as pd
import zipfile
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi, snapshot_download
from datasets import load_dataset
from src.utils import load_all_data
from src.md import ABOUT_TEXT, TOP_TEXT, SUBMIT_TEXT
from src.css import custom_css

api = HfApi()

COLLAB_TOKEN = os.environ.get("COLLAB_TOKEN")
eval_set_repo_id = "KwaiVGI/VideoGen-RewardBench"
eval_set_dir = "dataset"
eval_results_dir = "evals"

def restart_space():
    api.restart_space(repo_id=eval_set_repo_id, token=COLLAB_TOKEN)

color_map = {
    "Generative": "#7497db",
    "Custom Classifiers": "#E8ECF2",
    "Seq. Classifiers": "#ffcd75",
    "DPO": "#75809c",
}

def color_model_type_column(df, color_map):
    """
    Apply color to the 'Model Type' column of the DataFrame based on a given color mapping.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the 'Model Type' column.
    color_map (dict): A dictionary mapping model types to colors.

    Returns:
    pd.Styler: The styled DataFrame.
    """
    # Function to apply color based on the model type
    def apply_color(val):
        color = color_map.get(val, "default")  # Default color if not specified in color_map
        return f'background-color: {color}'
    
    # Format for different columns
    format_dict = {col: "{:.2f}" for col in df.columns if col not in ['Avg.', 'Model', 'Model Type']}
    format_dict['Avg.'] = "{:.2f}"
    format_dict[''] = "{:d}"

    return df.style.applymap(apply_color, subset=['Model Type']).format(format_dict, na_rep='')

    
def regex_table(dataframe, regex, filter_button, style=True):
    """
    Takes a Model as a regex, then returns only the rows that has that in it.
    """
    # Split regex statement by comma and trim whitespace around regexes
    regex_list = [x.strip() for x in regex.split(",")]
    # Join the list into a single regex pattern with '|' acting as OR
    combined_regex = '|'.join(regex_list)

    update_scores = False
    if isinstance(filter_button, list) or isinstance(filter_button, str):
        if "Seq. Classifiers" not in filter_button:
            dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifiers", case=False, na=False)]
        if "Custom Classifiers" not in filter_button:
            dataframe = dataframe[~dataframe["Model Type"].str.contains("Custom Classifiers", case=False, na=False)]
        if "Generative" not in filter_button:
            dataframe = dataframe[~dataframe["Model Type"].str.contains("Generative", case=False, na=False)]
        
        if "w/o Ties" not in filter_button:
            dataframe = dataframe[[col for col in dataframe.columns if "w/o Ties" not in col]]
        if "w/ Ties" not in filter_button:
            dataframe = dataframe[[col for col in dataframe.columns if "w/ Ties" not in col]]
    # Filter the dataframe such that 'model' contains any of the regex patterns
    data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)]
    data.reset_index(drop=True, inplace=True)
    data.insert(0, '', range(len(data)))

    data = color_model_type_column(data, color_map)

    return data

repo = snapshot_download(
    local_dir=eval_set_dir,
    repo_id=eval_set_repo_id,
    use_auth_token=COLLAB_TOKEN,
    tqdm_class=None, 
    etag_timeout=30,
    repo_type="dataset",
)

with zipfile.ZipFile(os.path.join(eval_set_dir, 'videos.zip'), 'r') as zip_ref:
    zip_ref.extractall(eval_set_dir)

rewardbench_data = load_all_data(eval_results_dir).sort_values(by='Avg.', ascending=False)
col_types_rewardbench = ["number"] + ["markdown"]+ ["str"] + ["number"] * (len(rewardbench_data.columns) - 1)
# for showing random samples
eval_set = pd.read_csv(os.path.join(eval_set_dir, 'videogen-rewardbench.csv'))
subsets = list(eval_set['prompt'].unique())
# N=20
# if len(subsets) > N:
#     random.seed(time.time())
#     subsets = random.sample(subsets, N)

def random_sample(selected_prompts):
    # Filter the eval_set based on the selected prompts
    filtered_data = eval_set[eval_set['prompt'] == selected_prompts]
    if filtered_data.empty:
        return "No data available for the selected prompt(s)."
    
    # Randomly select a sample from the filtered data
    sample = filtered_data.sample(n=1, random_state=int(time.time())).iloc[0]
    
    # Prepare the markdown text with the required fields
    markdown_text = f"**Prompt**: {sample['prompt']}\n\n\n"
    markdown_text += f"**Preference**: \n"
    markdown_text += "| **Visual Quality** | **Motion Quality** | **Text Alignment** | **Overall** | **A_model** | **B_model** |\n"
    markdown_text += "|:------------------:|:------------------:|:------------------:|:-----------:|:-----------:|:-----------:|\n"
    markdown_text += "| "
    markdown_text += f"{'A>B' if sample['VQ'] == 'A' else 'A<B' if sample['VQ'] == 'B' else 'A=B'} | "
    markdown_text += f"{'A>B' if sample['MQ'] == 'A' else 'A<B' if sample['MQ'] == 'B' else 'A=B'} | "
    markdown_text += f"{'A>B' if sample['TA'] == 'A' else 'A<B' if sample['TA'] == 'B' else 'A=B'} | "
    markdown_text += f"{'A>B' if sample['Overall'] == 'A' else 'A<B' if sample['Overall'] == 'B' else 'A=B'} | "
    markdown_text += f"{sample['A_model']} | {sample['B_model']} |\n"
    # Load and display videos from path_A and path_B
    video_a = gr.Video(value=os.path.join(eval_set_dir, sample['path_A']))
    video_b = gr.Video(value=os.path.join(eval_set_dir, sample['path_B']))
    
    return markdown_text, video_a, video_b

total_models = len(rewardbench_data)

with gr.Blocks(css=custom_css) as app:
    with gr.Row():
        with gr.Column(scale=7):
            gr.Markdown(TOP_TEXT.format(str(total_models)))
        with gr.Column(scale=3):
            gr.Markdown("""
        <img src="https://i.postimg.cc/rpMSzBnV/logo.png" style="width:800px;" alt="Logo">
        """)
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ† VideoGen-RewardBench Leaderboard"):
            with gr.Row():
                with gr.Column(scale=4):
                    search_1 = gr.Textbox(label="Model Search (delimit with , )", 
                                          placeholder="Model Search (delimit with , )",
                                          show_label=False)
                with gr.Column(scale=6):
                    model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "Custom Classifiers", "Generative", "w/o Ties", "w/ Ties"], 
                                                     value=["Seq. Classifiers", "Custom Classifiers", "Generative", "w/o Ties", "w/ Ties"], 
                                                     label="Model Types", 
                                                     show_label=False)
            with gr.Row():
                # reference data
                rewardbench_table_hidden = gr.Dataframe(
                    rewardbench_data,
                    datatype=col_types_rewardbench,
                    headers=rewardbench_data.columns.tolist(),
                    visible=False,
                )
                rewardbench_table = gr.Dataframe(
                    regex_table(rewardbench_data.copy(), "", ["Seq. Classifiers", "Custom Classifiers",  "Generative", "Others", "w/o Ties", "w/ Ties"]),
                    datatype=col_types_rewardbench,
                    headers=rewardbench_data.columns.tolist(),
                    elem_id="rewardbench_dataframe_avg",
                    # height=1000,
                )

            with gr.Row():
                gr.Markdown(ABOUT_TEXT)

        with gr.TabItem("πŸ“€ How to Submit"):
            with gr.Row():
                gr.Markdown(SUBMIT_TEXT)

        with gr.TabItem("πŸ” Dataset Viewer"):
            with gr.Row():
                # loads one sample
                gr.Markdown("""## Random Dataset Sample Viewer""")
                subset_selector = gr.Dropdown(subsets, label="Subset", value=None, multiselect=False)
                button = gr.Button("Show Random Sample")

            with gr.Row():
                sample_display = gr.Markdown("{sampled data loads here}")
            with gr.Row():
                video_a_display = gr.Video()
                video_b_display = gr.Video()

            button.click(fn=random_sample, inputs=[subset_selector], outputs=[sample_display, video_a_display, video_b_display])

    search_1.change(regex_table, inputs=[rewardbench_table_hidden, search_1, model_types_1], outputs=rewardbench_table)
    model_types_1.change(regex_table, inputs=[rewardbench_table_hidden, search_1, model_types_1], outputs=rewardbench_table)
    with gr.Row():
            with gr.Accordion("πŸ“š Citation", open=False):
                citation_button = gr.Textbox(
                    value=r"""@article{liu2025improving,
  title={Improving Video Generation with Human Feedback},
  author={Liu, Jie and Liu, Gongye and Liang, Jiajun and Yuan, Ziyang and Liu, Xiaokun and Zheng, Mingwu and Wu, Xiele and Wang, Qiulin and Qin, Wenyu and Xia, Menghan and others},
  journal={arXiv preprint arXiv:2501.13918},
  year={2025}
}""",
                    lines=5,
                    label="Copy the following to cite these results.",
                    elem_id="citation-button",
                    show_copy_button=True,
                )


scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
app.queue(default_concurrency_limit=40).launch()