Spaces:
Sleeping
Sleeping
import gradio as gr | |
from gradio_huggingfacehub_search import HuggingfaceHubSearch | |
import nbformat as nbf | |
from huggingface_hub import HfApi | |
""" | |
TODOs: | |
- Handle erros | |
- Add more commands to the notebook | |
- Parametrize the commands | |
- How to handle configs and splits? | |
- Let user choose the framework | |
- Improve logs | |
""" | |
def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"): | |
nb = nbf.v4.new_notebook() | |
nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands] | |
with open(notebook_name, "w") as f: | |
nbf.write(nb, f) | |
print(f"Notebook '{notebook_name}' created successfully.") | |
def push_notebook(file_path, dataset_id, token): | |
notebook_name = "dataset_analysis.ipynb" | |
api = HfApi(token=token) | |
api.upload_file( | |
path_or_fileobj=file_path, | |
path_in_repo=notebook_name, | |
repo_id=dataset_id, | |
repo_type="dataset", | |
) | |
print("Notebook uploaded to Huggingface Hub.") | |
link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}" | |
html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>' | |
return gr.HTML(value=html, visible=True) | |
def generate_notebook(dataset_id): | |
commands = [ | |
"!pip install pandas", | |
"import pandas as pd", | |
f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')", | |
"df.head()", | |
] | |
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb" | |
create_notebook_file(commands, notebook_name=notebook_name) | |
return gr.File(value=notebook_name, visible=True), gr.Row.update(visible=True) | |
with gr.Blocks() as demo: | |
gr.Markdown("# π€ Dataset auto analyst creator π΅οΈ") | |
dataset_name = HuggingfaceHubSearch( | |
label="Hub Dataset ID", | |
placeholder="Search for dataset id on Huggingface", | |
search_type="dataset", | |
value="", | |
) | |
def embed(name): | |
if not name: | |
return gr.Markdown("### No dataset provided") | |
html_code = f""" | |
<iframe | |
src="https://huggingface.co/datasets/{name}/embed/viewer/default/train" | |
frameborder="0" | |
width="100%" | |
height="350px" | |
></iframe> | |
""" | |
return gr.HTML(value=html_code) | |
generate_btn = gr.Button("Generate notebook") | |
download_link = gr.File(label="Download notebook", visible=False) | |
with gr.Row(visible=False) as auth_page: | |
with gr.Column(): | |
auth_title = gr.Markdown( | |
"Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):" | |
) | |
token_box = gr.Textbox( | |
"", label="token", placeholder="hf_xxx", type="password" | |
) | |
auth_error = gr.Markdown("", visible=False) | |
push_btn = gr.Button("Push notebook to hub", visible=False) | |
output_lbl = gr.HTML(value="", visible=False) | |
generate_btn.click( | |
generate_notebook, | |
inputs=[dataset_name], | |
outputs=[download_link, auth_page], | |
) | |
def auth(token): | |
if not token: | |
return { | |
auth_error: gr.Markdown(value="", visible=False), | |
push_btn: gr.Button(visible=False), | |
} | |
return { | |
auth_error: gr.Markdown(value="", visible=False), | |
push_btn: gr.Button("Push notebook to hub", visible=True), | |
} | |
token_box.change( | |
auth, | |
inputs=token_box, | |
outputs=[auth_error, push_btn], | |
) | |
push_btn.click( | |
push_notebook, | |
inputs=[download_link, dataset_name, token_box], | |
outputs=output_lbl, | |
) | |
demo.launch() | |