lysandre's picture
lysandre HF staff
Add "Benchmarking transformers on various AI hardware accelerators"
e8002b4
raw
history blame
4.94 kB
import json
import os
from io import BytesIO
import gradio as gr
from huggingface_hub import upload_file
default_question = """
We're going to use the <a href="https://huggingface.co/datasets/wikitext" target="_blank"><code>wikitext (link)</a></code> dataset with the <code><a href="https://huggingface.co/distilbert-base-cased" target="_blank">distilbert-base-cased (link)</a></code> model checkpoint.
<br/><br/>
Start by loading the <code>wikitext-2-raw-v1</code> version of that dataset, and take the 11th example (index 10) of the <code>train</code> split.<br/>
We'll tokenize this using the appropriate tokenizer, and we'll mask the sixth token (index 5) the sequence.
<br/><br/>
When using the <code>distilbert-base-cased</code> checkpoint to unmask that (sixth token, index 5) token, what is the most probable predicted token (please provide the decoded token, and not the ID)?
<br/>
<br/>
Tips:
<br/>
- You might find the <a href="https://huggingface.co/docs/transformers/index" target="_blank">transformers docs (link)</a> useful.
<br/>
- You might find the <a href="https://huggingface.co/docs/datasets/index" target="_blank">datasets docs (link)</a> useful.
<br/>
- You might also be interested in the <a href="https://huggingface.co/course" target="_blank">Hugging Face course (link)</a>.
"""
skops_question = """
1. Create a python environment[1] and install `scikit-learn` version `1.0` in that environment.
<br/>
2. Using that environment, create a `LogisticRegression` model[2] and fit it on the Iris dataset[3].
<br/>
3. Save the trained model using `pickle`[4] or `joblib`[5].
<br/>
4. Create a second environment, and install `scikit-learn` version `1.1` in it.
<br/>
5. Try loading the model you saved in step 3 in this second environment.
<br/>
<br/>
Question:
<br/>
Is there a warning or error you receive while trying to load the model? If yes, what exactly is it.
<br/>
<br/>
References
<br/>
- [1] You can use any tool you want to create the environment. Two of the options are:
<br/>
- `venv`: https://docs.python.org/3/library/venv.html
<br/>
- `mamba`: https://github.com/mamba-org/mamba
<br/>
- [2] `LogisticRegression` API guide: https://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html
<br/>
- [3] `load_iris` API guide: https://scikit-learn.org/dev/modules/generated/sklearn.datasets.load_iris.html
<br/>
- [4] `pickle`: https://docs.python.org/3/library/pickle.html
<br/>
- [5] - `joblib`: https://joblib.readthedocs.io/en/latest/
"""
internships = {
'Accelerate': default_question,
'Diffusion distillation': default_question,
'Skops & Scikit-Learn': skops_question,
"Code Generation": default_question,
"Document AI Democratization": default_question,
"Evaluate": default_question,
"ASR": default_question,
"Efficient video pretraining": default_question,
"Embodied AI": default_question,
"Emergence of scene and text understanding": default_question,
"Everything is multimodal": default_question,
"Everything is vision": default_question,
"Retrieval augmentation as prompting": default_question,
"Social impact evaluations": default_question,
"Toolkit for detecting distribution shift": default_question,
"AI Art Tooling Residency": default_question,
"Gradio as an ecosystem": default_question,
"Benchmarking transformers on various AI hardware accelerators": default_question,
}
with gr.Blocks() as demo:
gr.Markdown(
"""
# Internship introduction
Please select the internship you would like to apply to and answer the question asked in the Answer box.
"""
)
internship_choice = gr.Dropdown(label='Internship', choices=list(internships.keys()))
with gr.Column(visible=False) as details_col:
summary = gr.HTML(label='Question')
details = gr.Textbox(label="Answer")
username = gr.Textbox(label="Hugging Face Username")
comment = gr.Textbox(label="Any comment?")
generate_btn = gr.Button("Submit")
output = gr.Label()
def filter_species(species):
return gr.Label.update(
internships[species]
), gr.update(visible=True)
internship_choice.change(filter_species, internship_choice, [summary, details_col])
def on_click(_details, _username, _internship_choice, _comment):
response = {'response': _details, "internship": _internship_choice, "comment": _comment}
upload_file(
path_or_fileobj=BytesIO(bytes(json.dumps(response), 'utf-8')),
path_in_repo=_username,
repo_id='internships/internships-2023',
repo_type='dataset',
token=os.environ['HF_TOKEN']
)
return f"Submitted: '{_details}' for user '{_username}'"
generate_btn.click(on_click, inputs=[details, username, internship_choice, comment], outputs=[output])
if __name__ == "__main__":
demo.launch()