import gradio as gr from datasets import load_dataset # Load the dataset from Hugging Face dataset = load_dataset("princeton-nlp/SWE-bench") # Define a function to display a sample from the dataset def get_result(id,name,mk): if name == "": return get_sample(id) return get_sample_filter(name) def get_sample(sample_id): try: # Get the sample from the dataset sample = dataset['test'][int(sample_id)] return sample except: return {"error": "Invalid sample ID"} def get_sample_filter(repo_name): try: # Find the sample with the specified repository name in 'train' dataset result = [] for sample in dataset['test']: if repo_name in sample["repo"]: issue_id=sample["instance_id"].split("-")[1] repo=sample["repo"] issue_url=f"https://github.com/{sample['repo']}/issues/{issue_id}" base_commit=sample["base_commit"] result.append({"repo":repo,"base_commit":base_commit,"issue_url":issue_url}) # Return the found sample as a response return result except Exception as e: return {"error": f"Invalid repository name or error occurred: {str(e)}"} # Create the Gradio interface block = gr.Markdown("[https://huggingface.co/datasets/princeton-nlp/SWE-bench](https://huggingface.co/datasets/princeton-nlp/SWE-bench)") slider = gr.Slider(label="Select ID", minimum=0, maximum=2248, step=1, value=0) input_box=gr.Textbox(lines=1, placeholder="Enter sample ID (integer)") iface = gr.Interface( fn=get_result, #inputs=gr.Textbox(lines=1, placeholder="Enter sample ID (integer)"), inputs=[slider,input_box,block], outputs=gr.JSON(), title="SWE-bench Dataset Viewer", description="Enter a sample ID to view the corresponding data from the SWE-bench dataset." ) # Launch the interface if __name__ == "__main__": iface.launch(share=True)