Spaces:
Sleeping
Sleeping
File size: 3,846 Bytes
421b068 7d529e0 734af25 421b068 7d529e0 421b068 7d529e0 421b068 7d529e0 421b068 7d529e0 6d0709a 7d529e0 6d0709a 7d529e0 6d0709a 7d529e0 6d0709a 421b068 7d529e0 6d0709a 7d529e0 421b068 6d0709a 421b068 7d529e0 421b068 7d529e0 421b068 734af25 6d0709a 734af25 7d529e0 6d0709a 7d529e0 6d0709a 7d529e0 6d0709a 7d529e0 6d0709a 734af25 6d0709a 421b068 6d0709a 7d529e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
from gradio_huggingfacehub_search import HuggingfaceHubSearch
import nbformat as nbf
from huggingface_hub import HfApi
"""
TODOs:
- Show auth and push button only after notebook creation
- Improve the link to the result notebook
- Handle erros
- Add more commands to the notebook
- Parametrize the commands
- How to handle configs and splits?
- Let user choose the framework
- Improve logs
"""
def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"):
nb = nbf.v4.new_notebook()
nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
with open(notebook_name, "w") as f:
nbf.write(nb, f)
print(f"Notebook '{notebook_name}' created successfully.")
def push_notebook(file_path, dataset_id, token):
api = HfApi(token=token)
api.upload_file(
path_or_fileobj=file_path,
path_in_repo="dataset_analysis.ipynb",
repo_id=dataset_id,
repo_type="dataset",
)
# TODO: Handle permission error
print("Notebook uploaded to Huggingface Hub.")
link = (
f"https://huggingface.co/datasets/{dataset_id}/blob/main/dataset_analyst.ipynb"
)
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">See notebook</a>'
def generate_notebook(dataset_id):
# TODO: Get first config and split? or generate a dataframe per each split maybe?
commands = [
"!pip install pandas",
"import pandas as pd",
f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')",
"df.head()",
]
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
create_notebook_file(commands, notebook_name=notebook_name)
print("Notebook uploaded to Huggingface Hub.")
return notebook_name
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Dataset auto analyst creator 🕵️")
dataset_name = HuggingfaceHubSearch(
label="Hub Dataset ID",
placeholder="Search for dataset id on Huggingface",
search_type="dataset",
value="",
)
@gr.render(inputs=dataset_name)
def embed(name):
if not name:
return gr.Markdown("### No dataset provided")
html_code = f"""
<iframe
src="https://huggingface.co/datasets/{name}/embed/viewer/default/train"
frameborder="0"
width="100%"
height="600px"
></iframe>
"""
return gr.HTML(value=html_code)
generate_btn = gr.Button("Generate notebook", visible=True)
download_link = gr.File(label="Download notebook")
generate_btn.click(
generate_notebook, inputs=[dataset_name], outputs=[download_link]
)
with gr.Row() as auth_page:
with gr.Column():
auth_title = gr.Markdown(
"Enter your token ([settings](https://huggingface.co/settings/tokens)):"
)
token_box = gr.Textbox(
"", label="token", placeholder="hf_xxx", type="password"
)
auth_error = gr.Markdown("", visible=False)
def auth(token):
if not token:
return {
auth_error: gr.Markdown(value="", visible=False),
push_btn: gr.Row(visible=False),
}
return {
auth_error: gr.Markdown(value="", visible=False),
push_btn: gr.Row(visible=True),
}
push_btn = gr.Button("Push notebook to hub", visible=False)
token_box.change(
auth,
inputs=token_box,
outputs=[auth_error, push_btn],
)
output_lbl = gr.HTML(value="")
push_btn.click(
push_notebook,
inputs=[download_link, dataset_name, token_box],
outputs=[output_lbl],
)
demo.launch()
|