Spaces:
Sleeping
Sleeping
File size: 3,794 Bytes
421b068 7d529e0 734af25 421b068 7d529e0 421b068 7d529e0 421b068 7d529e0 421b068 7d529e0 6d0709a 2d53b10 6d0709a 2d53b10 6d0709a 2d53b10 6d0709a 7d529e0 421b068 7d529e0 6d0709a 7d529e0 421b068 2d53b10 421b068 7d529e0 421b068 7d529e0 421b068 2d53b10 421b068 2d53b10 421b068 2d53b10 6d0709a 2d53b10 6d0709a 7d529e0 6d0709a 2d53b10 6d0709a 7d529e0 2d53b10 7d529e0 6d0709a 2d53b10 6d0709a 7d529e0 2d53b10 7d529e0 2d53b10 7d529e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
from gradio_huggingfacehub_search import HuggingfaceHubSearch
import nbformat as nbf
from huggingface_hub import HfApi
"""
TODOs:
- Handle erros
- Add more commands to the notebook
- Parametrize the commands
- How to handle configs and splits?
- Let user choose the framework
- Improve logs
"""
def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"):
nb = nbf.v4.new_notebook()
nb["cells"] = [nbf.v4.new_code_cell(command) for command in cell_commands]
with open(notebook_name, "w") as f:
nbf.write(nb, f)
print(f"Notebook '{notebook_name}' created successfully.")
def push_notebook(file_path, dataset_id, token):
notebook_name = "dataset_analysis.ipynb"
api = HfApi(token=token)
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=notebook_name,
repo_id=dataset_id,
repo_type="dataset",
)
print("Notebook uploaded to Huggingface Hub.")
link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
html = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>'
return gr.HTML(value=html, visible=True)
def generate_notebook(dataset_id):
commands = [
"!pip install pandas",
"import pandas as pd",
f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')",
"df.head()",
]
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
create_notebook_file(commands, notebook_name=notebook_name)
return gr.File(value=notebook_name, visible=True), gr.Row.update(visible=True)
with gr.Blocks() as demo:
gr.Markdown("# 🤖 Dataset auto analyst creator 🕵️")
dataset_name = HuggingfaceHubSearch(
label="Hub Dataset ID",
placeholder="Search for dataset id on Huggingface",
search_type="dataset",
value="",
)
@gr.render(inputs=dataset_name)
def embed(name):
if not name:
return gr.Markdown("### No dataset provided")
html_code = f"""
<iframe
src="https://huggingface.co/datasets/{name}/embed/viewer/default/train"
frameborder="0"
width="100%"
height="350px"
></iframe>
"""
return gr.HTML(value=html_code)
generate_btn = gr.Button("Generate notebook")
download_link = gr.File(label="Download notebook", visible=False)
with gr.Row(visible=False) as auth_page:
with gr.Column():
auth_title = gr.Markdown(
"Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):"
)
token_box = gr.Textbox(
"", label="token", placeholder="hf_xxx", type="password"
)
auth_error = gr.Markdown("", visible=False)
push_btn = gr.Button("Push notebook to hub", visible=False)
output_lbl = gr.HTML(value="", visible=False)
generate_btn.click(
generate_notebook,
inputs=[dataset_name],
outputs=[download_link, auth_page],
)
def auth(token):
if not token:
return {
auth_error: gr.Markdown(value="", visible=False),
push_btn: gr.Button(visible=False),
}
return {
auth_error: gr.Markdown(value="", visible=False),
push_btn: gr.Button("Push notebook to hub", visible=True),
}
token_box.change(
auth,
inputs=token_box,
outputs=[auth_error, push_btn],
)
push_btn.click(
push_notebook,
inputs=[download_link, dataset_name, token_box],
outputs=output_lbl,
)
demo.launch()
|