import gradio as gr
from datasets import load_dataset

# Load the dataset from Hugging Face
dataset = load_dataset("princeton-nlp/SWE-bench")

# Define a function to display a sample from the dataset

def get_result(id,name,mk):
    if name == "":
        return get_sample(id)
    return get_sample_filter(name)

def get_sample(sample_id):
    try:
        # Get the sample from the dataset
        sample = dataset['test'][int(sample_id)]
        return sample
    except:
        return {"error": "Invalid sample ID"}

def get_sample_filter(repo_name):
        try:
            # Find the sample with the specified repository name in 'train' dataset
            result = [] 
            for sample in dataset['test']:
                if repo_name in sample["repo"]:

                    issue_id=sample["instance_id"].split("-")[1]
                    repo=sample["repo"]
                    issue_url=f"https://github.com/{sample['repo']}/issues/{issue_id}"
                    base_commit=sample["base_commit"]
                    result.append({"repo":repo,"base_commit":base_commit,"issue_url":issue_url})  # Return the found sample as a response

            return result 
        except Exception as e:
            return {"error": f"Invalid repository name or error occurred: {str(e)}"}


# Create the Gradio interface
block = gr.Markdown("[https://huggingface.co/datasets/princeton-nlp/SWE-bench](https://huggingface.co/datasets/princeton-nlp/SWE-bench)")
slider = gr.Slider(label="Select ID", minimum=0, maximum=2248, step=1, value=0)
input_box=gr.Textbox(lines=1, placeholder="Enter sample ID (integer)") 

iface = gr.Interface(
    fn=get_result,
    #inputs=gr.Textbox(lines=1, placeholder="Enter sample ID (integer)"), 
    inputs=[slider,input_box,block],
    outputs=gr.JSON(),
    title="SWE-bench Dataset Viewer",
    description="Enter a sample ID to view the corresponding data from the SWE-bench dataset."
)

# Launch the interface
if __name__ == "__main__":
    iface.launch(share=True)