Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import os | |
import uuid | |
import datetime | |
import logging | |
from huggingface_hub import hf_hub_download, upload_file, list_repo_tree | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Configuration | |
HF_INPUT_DATASET = os.getenv("HF_INPUT_DATASET") | |
HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH") | |
HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN") | |
HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A") | |
HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B") | |
HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET") | |
HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR") | |
INSTRUCTIONS = """ | |
# Pairwise Model Output Labeling | |
Please compare the two model outputs shown below and select which one you think is better. | |
- Choose "Left is better" if the left output is superior | |
- Choose "Right is better" if the right output is superior | |
- Choose "Tie" if they are equally good or bad | |
- Choose "Can't choose" if you cannot make a determination | |
""" | |
SAVE_EVERY_N_EXAMPLES = 5 | |
class PairwiseLabeler: | |
def __init__(self): | |
self.current_index = 0 | |
self.results = [] | |
self.df = self.read_hf_dataset() | |
def __len__(self): | |
return len(self.df) | |
def read_hf_dataset(self) -> pd.DataFrame: | |
try: | |
local_file = hf_hub_download(repo_id=HF_INPUT_DATASET, repo_type="dataset", filename=HF_INPUT_DATASET_PATH) | |
if local_file.endswith(".json"): | |
return pd.read_json(local_file) | |
elif local_file.endswith(".jsonl"): | |
return pd.read_json(local_file, orient="records",lines=True) | |
elif local_file.endswith(".csv"): | |
return pd.read_csv(local_file) | |
elif local_file.endswith(".parquet"): | |
return pd.read_parquet(local_file) | |
else: | |
raise ValueError(f"Unsupported file type: {local_file}") | |
except Exception as e: | |
# Fallback to sample data if loading fails | |
logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.") | |
sample_data = { | |
HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)], | |
HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)], | |
HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)], | |
} | |
return pd.DataFrame(sample_data) | |
def get_current_pair(self): | |
if self.current_index >= len(self.df): | |
return None, None, None | |
item = self.df.iloc[self.current_index] | |
item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}") | |
left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "") | |
right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "") | |
return item_id, left_text, right_text | |
def submit_judgment(self, item_id, left_text, right_text, choice): | |
if item_id is None: | |
return item_id, left_text, right_text, self.current_index | |
# Record the judgment | |
result = { | |
"item_id": item_id, | |
"generation_a": left_text, | |
"generation_b": right_text, | |
"judgment": choice, | |
"timestamp": datetime.datetime.now().isoformat(), | |
"labeler_id": str(uuid.uuid4())[:8] # Anonymous ID for the labeling session | |
} | |
self.results.append(result) | |
# Move to next item | |
self.current_index += 1 | |
# Save results periodically | |
if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0: | |
self.save_results() | |
# Get next pair | |
next_id, next_left, next_right = self.get_current_pair() | |
return next_id, next_left, next_right, self.current_index | |
def save_results(self): | |
if not self.results: | |
return | |
try: | |
# Convert results to dataset format | |
results_df = pd.DataFrame(self.results) | |
results_df.to_json("temp.jsonl", orient="records", lines=True) | |
# Push to Hugging Face Hub | |
try: | |
num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)]) | |
except Exception as e: | |
num_files = 0 | |
upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl") | |
os.remove("temp.jsonl") | |
self.results = [] | |
logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}") | |
except Exception as e: | |
logging.error(f"Error saving results: {e}") | |
# Initialize the labeler | |
labeler = PairwiseLabeler() | |
# Get the first pair | |
initial_id, initial_left, initial_right = labeler.get_current_pair() | |
with gr.Blocks() as app: | |
gr.Markdown(INSTRUCTIONS) | |
with gr.Row(): | |
with gr.Column(): | |
left_output = gr.Textbox( | |
value=initial_left, | |
label="Model Output A", | |
lines=10, | |
interactive=False | |
) | |
with gr.Column(): | |
right_output = gr.Textbox( | |
value=initial_right, | |
label="Model Output B", | |
lines=10, | |
interactive=False | |
) | |
item_id = gr.Textbox(value=initial_id, visible=False) | |
with gr.Row(): | |
left_btn = gr.Button("⬅️ A is better", variant="primary") | |
right_btn = gr.Button("➡️ B is better", variant="primary") | |
tie_btn = gr.Button("🤝 Tie", variant="primary") | |
cant_choose_btn = gr.Button("🤔 Can't choose") | |
current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1, | |
value=labeler.current_index, | |
interactive=False, | |
label='sample_ind', | |
info=f"Samples labeled (out of {len(labeler)})", | |
show_label=False, | |
container=False, | |
scale=5) | |
def judge_left(item_id, left_text, right_text): | |
return judge("A is better", item_id, left_text, right_text) | |
def judge_right(item_id, left_text, right_text): | |
return judge("B is better", item_id, left_text, right_text) | |
def judge_tie(item_id, left_text, right_text): | |
return judge("Tie", item_id, left_text, right_text) | |
def judge_cant_choose(item_id, left_text, right_text): | |
return judge("Can't choose", item_id, left_text, right_text) | |
def judge(choice, item_id, left_text, right_text): | |
new_id, new_left, new_right, new_index = labeler.submit_judgment( | |
item_id, left_text, right_text, choice | |
) | |
return new_id, new_left, new_right, new_index | |
left_btn.click( | |
judge_left, | |
inputs=[item_id, left_output, right_output], | |
outputs=[item_id, left_output, right_output, current_sample_sld] | |
) | |
right_btn.click( | |
judge_right, | |
inputs=[item_id, left_output, right_output], | |
outputs=[item_id, left_output, right_output, current_sample_sld] | |
) | |
tie_btn.click( | |
judge_tie, | |
inputs=[item_id, left_output, right_output], | |
outputs=[item_id, left_output, right_output, current_sample_sld] | |
) | |
cant_choose_btn.click( | |
judge_cant_choose, | |
inputs=[item_id, left_output, right_output], | |
outputs=[item_id, left_output, right_output, current_sample_sld] | |
) | |
if __name__ == "__main__": | |
app.launch() | |