labeling-summarization

Sleeping

App Files Files Community

labeling-summarization / app.py

saridormi

initial commit

f053717 6 months ago

raw

history blame

7.94 kB

	import gradio as gr
	import pandas as pd
	import os
	import uuid
	import datetime
	import logging
	from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
	from dotenv import load_dotenv

	load_dotenv()

	# Configuration
	HF_INPUT_DATASET = os.getenv("HF_INPUT_DATASET")
	HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
	HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
	HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
	HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
	HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
	HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
	INSTRUCTIONS = """
	# Pairwise Model Output Labeling

	Please compare the two model outputs shown below and select which one you think is better.
	- Choose "Left is better" if the left output is superior
	- Choose "Right is better" if the right output is superior
	- Choose "Tie" if they are equally good or bad
	- Choose "Can't choose" if you cannot make a determination
	"""
	SAVE_EVERY_N_EXAMPLES = 5


	class PairwiseLabeler:
	def __init__(self):
	self.current_index = 0
	self.results = []
	self.df = self.read_hf_dataset()

	def __len__(self):
	return len(self.df)

	def read_hf_dataset(self) -> pd.DataFrame:
	try:
	local_file = hf_hub_download(repo_id=HF_INPUT_DATASET, repo_type="dataset", filename=HF_INPUT_DATASET_PATH)
	if local_file.endswith(".json"):
	return pd.read_json(local_file)
	elif local_file.endswith(".jsonl"):
	return pd.read_json(local_file, orient="records",lines=True)
	elif local_file.endswith(".csv"):
	return pd.read_csv(local_file)
	elif local_file.endswith(".parquet"):
	return pd.read_parquet(local_file)
	else:
	raise ValueError(f"Unsupported file type: {local_file}")
	except Exception as e:
	# Fallback to sample data if loading fails
	logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
	sample_data = {
	HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
	HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
	HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
	}
	return pd.DataFrame(sample_data)

	def get_current_pair(self):
	if self.current_index >= len(self.df):
	return None, None, None

	item = self.df.iloc[self.current_index]
	item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}")
	left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
	right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")

	return item_id, left_text, right_text

	def submit_judgment(self, item_id, left_text, right_text, choice):
	if item_id is None:
	return item_id, left_text, right_text, self.current_index

	# Record the judgment
	result = {
	"item_id": item_id,
	"generation_a": left_text,
	"generation_b": right_text,
	"judgment": choice,
	"timestamp": datetime.datetime.now().isoformat(),
	"labeler_id": str(uuid.uuid4())[:8] # Anonymous ID for the labeling session
	}

	self.results.append(result)

	# Move to next item
	self.current_index += 1

	# Save results periodically
	if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0:
	self.save_results()

	# Get next pair
	next_id, next_left, next_right = self.get_current_pair()
	return next_id, next_left, next_right, self.current_index

	def save_results(self):
	if not self.results:
	return

	try:
	# Convert results to dataset format
	results_df = pd.DataFrame(self.results)
	results_df.to_json("temp.jsonl", orient="records", lines=True)

	# Push to Hugging Face Hub
	try:
	num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)])
	except Exception as e:
	num_files = 0
	upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl")
	os.remove("temp.jsonl")
	self.results = []
	logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}")
	except Exception as e:
	logging.error(f"Error saving results: {e}")

	# Initialize the labeler
	labeler = PairwiseLabeler()

	# Get the first pair
	initial_id, initial_left, initial_right = labeler.get_current_pair()

	with gr.Blocks() as app:
	gr.Markdown(INSTRUCTIONS)

	with gr.Row():
	with gr.Column():
	left_output = gr.Textbox(
	value=initial_left,
	label="Model Output A",
	lines=10,
	interactive=False
	)

	with gr.Column():
	right_output = gr.Textbox(
	value=initial_right,
	label="Model Output B",
	lines=10,
	interactive=False
	)

	item_id = gr.Textbox(value=initial_id, visible=False)

	with gr.Row():
	left_btn = gr.Button("⬅️ A is better", variant="primary")
	right_btn = gr.Button("➡️ B is better", variant="primary")
	tie_btn = gr.Button("🤝 Tie", variant="primary")
	cant_choose_btn = gr.Button("🤔 Can't choose")

	current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
	value=labeler.current_index,
	interactive=False,
	label='sample_ind',
	info=f"Samples labeled (out of {len(labeler)})",
	show_label=False,
	container=False,
	scale=5)

	def judge_left(item_id, left_text, right_text):
	return judge("A is better", item_id, left_text, right_text)

	def judge_right(item_id, left_text, right_text):
	return judge("B is better", item_id, left_text, right_text)

	def judge_tie(item_id, left_text, right_text):
	return judge("Tie", item_id, left_text, right_text)

	def judge_cant_choose(item_id, left_text, right_text):
	return judge("Can't choose", item_id, left_text, right_text)

	def judge(choice, item_id, left_text, right_text):
	new_id, new_left, new_right, new_index = labeler.submit_judgment(
	item_id, left_text, right_text, choice
	)
	return new_id, new_left, new_right, new_index

	left_btn.click(
	judge_left,
	inputs=[item_id, left_output, right_output],
	outputs=[item_id, left_output, right_output, current_sample_sld]
	)

	right_btn.click(
	judge_right,
	inputs=[item_id, left_output, right_output],
	outputs=[item_id, left_output, right_output, current_sample_sld]
	)

	tie_btn.click(
	judge_tie,
	inputs=[item_id, left_output, right_output],
	outputs=[item_id, left_output, right_output, current_sample_sld]
	)

	cant_choose_btn.click(
	judge_cant_choose,
	inputs=[item_id, left_output, right_output],
	outputs=[item_id, left_output, right_output, current_sample_sld]
	)

	if __name__ == "__main__":
	app.launch()