import gradio as gr from gradio_huggingfacehub_search import HuggingfaceHubSearch import nbformat as nbf from huggingface_hub import HfApi """ TODOs: - Handle erros - Add more commands to the notebook - Parametrize the commands - How to handle configs and splits? - Let user choose the framework - Improve logs """ def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"): nb = nbf.v4.new_notebook() nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands] with open(notebook_name, "w") as f: nbf.write(nb, f) print(f"Notebook '{notebook_name}' created successfully.") def push_notebook(file_path, dataset_id, token): notebook_name = "dataset_analysis.ipynb" api = HfApi(token=token) api.upload_file( path_or_fileobj=file_path, path_in_repo=notebook_name, repo_id=dataset_id, repo_type="dataset", ) print("Notebook uploaded to Huggingface Hub.") link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}" html = f'See notebook' return gr.HTML(value=html, visible=True) def generate_notebook(dataset_id): commands = [ "!pip install pandas", "import pandas as pd", f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')", "df.head()", ] notebook_name = f"{dataset_id.replace('/', '-')}.ipynb" create_notebook_file(commands, notebook_name=notebook_name) return gr.File(value=notebook_name, visible=True), gr.Row.update(visible=True) with gr.Blocks() as demo: gr.Markdown("# 🤖 Dataset auto analyst creator 🕵️") dataset_name = HuggingfaceHubSearch( label="Hub Dataset ID", placeholder="Search for dataset id on Huggingface", search_type="dataset", value="", ) @gr.render(inputs=dataset_name) def embed(name): if not name: return gr.Markdown("### No dataset provided") html_code = f""" """ return gr.HTML(value=html_code) generate_btn = gr.Button("Generate notebook") download_link = gr.File(label="Download notebook", visible=False) with gr.Row(visible=False) as auth_page: with gr.Column(): auth_title = gr.Markdown( "Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):" ) token_box = gr.Textbox( "", label="token", placeholder="hf_xxx", type="password" ) auth_error = gr.Markdown("", visible=False) push_btn = gr.Button("Push notebook to hub", visible=False) output_lbl = gr.HTML(value="", visible=False) generate_btn.click( generate_notebook, inputs=[dataset_name], outputs=[download_link, auth_page], ) def auth(token): if not token: return { auth_error: gr.Markdown(value="", visible=False), push_btn: gr.Button(visible=False), } return { auth_error: gr.Markdown(value="", visible=False), push_btn: gr.Button("Push notebook to hub", visible=True), } token_box.change( auth, inputs=token_box, outputs=[auth_error, push_btn], ) push_btn.click( push_notebook, inputs=[download_link, dataset_name, token_box], outputs=output_lbl, ) demo.launch()