supervised_finetuning_quiz

Runtime error

App Files Files Community

burtenshaw commited on Feb 5

Commit

c0d911e

1 Parent(s): 6d5550e

update application with latest logic from parent

Browse files

Files changed (7) hide show

app.py +107 -84
data/supervised-finetuning.json +338 -0
example.json +0 -82
push_questions.py +33 -0
pyproject.toml +1 -1
requirements.txt +3 -3
uv.lock +9 -9

app.py CHANGED Viewed

@@ -6,8 +6,9 @@ import gradio as gr
 from datasets import load_dataset, Dataset
 from huggingface_hub import whoami
-EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
 EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
 EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
@@ -24,14 +25,37 @@ if EXAM_MAX_QUESTIONS:
 def on_user_logged_in(token: gr.OAuthToken | None):
     """
-    If the user has a valid token, hide the login button and show the Start button.
-    Otherwise, keep the login button visible, hide Start.
     """
     if token is not None:
-        return gr.update(visible=False), gr.update(visible=False)
     else:
-        # Not logged in, keep the login visible, hide Start
-        return gr.update(visible=True), gr.update(visible=False)
 def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
@@ -74,36 +98,24 @@ def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
 def handle_quiz(question_idx, user_answers, selected_answer, is_start):
     """
-    A single function that handles both 'Start' and 'Next' logic:
-      - If is_start=True, skip storing an answer and show the first question.
-      - Otherwise, store the last answer and move on.
-      - If we've reached the end, display results.
     """
-    # Hide the start button once the first question is shown
-    start_btn_update = gr.update(visible=False) if is_start else None
-    # If this is the first time (start=True), begin at question_idx=0
-    if is_start:
-        question_idx = 0
-    else:
-        # If not the very first question, store the user's last selection
-        if question_idx < len(quiz_data):
-            current_q = quiz_data[question_idx]
-            correct_reference = current_q["correct_answer"]
-            correct_reference = f"answer_{correct_reference}".lower()
-            is_correct = selected_answer == current_q[correct_reference]
-            user_answers.append(
-                {
-                    "question": current_q["question"],
-                    "selected_answer": selected_answer,
-                    "correct_answer": current_q[correct_reference],
-                    "is_correct": is_correct,
-                    "correct_reference": correct_reference,
-                }
-            )
         question_idx += 1
-    # If we've reached the end, show final results
     if question_idx >= len(quiz_data):
         correct_count = sum(1 for answer in user_answers if answer["is_correct"])
         grade = correct_count / len(user_answers)
@@ -112,37 +124,35 @@ def handle_quiz(question_idx, user_answers, selected_answer, is_start):
             f"Your score: {grade:.1%}\n"
             f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
         )
-        return (
-            "",  # question_text becomes blank
-            gr.update(choices=[], visible=False),
             f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
             question_idx,
             user_answers,
-            start_btn_update,
-            gr.update(value=results_text, visible=True),  # show final_markdown
-        )
-    else:
-        # Otherwise, show the next question
-        q = quiz_data[question_idx]
-        updated_question = f"## Question {question_idx + 1} \n### {q['question']}"
-        return (
-            updated_question,
-            gr.update(
-                choices=[
-                    q["answer_a"],
-                    q["answer_b"],
-                    q["answer_c"],
-                    q["answer_d"],
-                ],
-                value=None,
-                visible=True,
-            ),
-            "Select an answer and click 'Next' to continue.",
-            question_idx,
-            user_answers,
-            start_btn_update,
-            gr.update(visible=False),  # Hide final_markdown for now
-        )
 def success_message(response):
@@ -152,47 +162,58 @@ def success_message(response):
 with gr.Blocks() as demo:
     demo.title = f"Dataset Quiz for {EXAM_DATASET_ID}"
     # State variables
     question_idx = gr.State(value=0)
     user_answers = gr.State(value=[])
     with gr.Row(variant="compact"):
         gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
     with gr.Row(variant="compact"):
         gr.Markdown(
             "Log in first, then click 'Start' to begin. Answer each question, click 'Next', and finally click 'Submit' to publish your results to the Hugging Face Hub."
         )
-    # We display question text with Markdown
-    with gr.Row(
-        variant="panel",
-    ):
         question_text = gr.Markdown("")
         radio_choices = gr.Radio(
-            choices=[], visible=False, label="Your Answer", scale=1.5
         )
     with gr.Row(variant="compact"):
         status_text = gr.Markdown("")
     with gr.Row(variant="compact"):
-        # Final results after all questions are done
-        final_markdown = gr.Markdown("", visible=False)
-        next_btn = gr.Button("Next ⏭️")
-        submit_btn = gr.Button("Submit ✅")
-    with gr.Row(variant="compact"):
-        login_btn = gr.LoginButton()
-        # We'll hide the Start button until user logs in
-        start_btn = gr.Button("Start", visible=False)
-    # Use click() instead of login()
-    login_btn.click(fn=on_user_logged_in, inputs=None, outputs=[login_btn, start_btn])
-    # Click "Start" => show first question, hide Start button
     start_btn.click(
         fn=handle_quiz,
-        inputs=[question_idx, user_answers, radio_choices, gr.State(True)],
         outputs=[
             question_text,
             radio_choices,
@@ -200,11 +221,12 @@ with gr.Blocks() as demo:
             question_idx,
             user_answers,
             start_btn,
             final_markdown,
         ],
     )
-    # Click "Next" => store selection, move on
     next_btn.click(
         fn=handle_quiz,
         inputs=[question_idx, user_answers, radio_choices, gr.State(False)],
@@ -215,13 +237,14 @@ with gr.Blocks() as demo:
             question_idx,
             user_answers,
             start_btn,
             final_markdown,
         ],
     )
     submit_btn.click(fn=push_results_to_hub, inputs=[user_answers])
 if __name__ == "__main__":
     # Note: If testing locally, you'll need to run `huggingface-cli login` or set HF_TOKEN
     # environment variable for the login to work locally.

 from datasets import load_dataset, Dataset
 from huggingface_hub import whoami
+EXAM_DATASET_ID = (
+    os.getenv("EXAM_DATASET_ID") or "nlp-course/supervised-finetuning_quiz"
+)
 EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
 EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
 def on_user_logged_in(token: gr.OAuthToken | None):
     """
+    If the user has a valid token, show Start button.
+    Otherwise, keep the login button visible.
     """
     if token is not None:
+        return [
+            gr.update(visible=False),  # login button visibility
+            gr.update(visible=True),  # start button visibility
+            gr.update(visible=False),  # next button visibility
+            gr.update(visible=False),  # submit button visibility
+            "",  # question text
+            [],  # radio choices (empty list = no choices)
+            "Click 'Start' to begin the quiz",  # status message
+            0,  # question_idx
+            [],  # user_answers
+            "",  # final_markdown content
+            token,  # user token
+        ]
     else:
+        return [
+            gr.update(visible=True),  # login button visibility
+            gr.update(visible=False),  # start button visibility
+            gr.update(visible=False),  # next button visibility
+            gr.update(visible=False),  # submit button visibility
+            "",  # question text
+            [],  # radio choices
+            "",  # status message
+            0,  # question_idx
+            [],  # user_answers
+            "",  # final_markdown content
+            None,  # no token
+        ]
 def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
 def handle_quiz(question_idx, user_answers, selected_answer, is_start):
     """
+    Handle quiz state transitions and store answers
     """
+    if not is_start and question_idx < len(quiz_data):
+        current_q = quiz_data[question_idx]
+        correct_reference = current_q["correct_answer"]
+        correct_reference = f"answer_{correct_reference}".lower()
+        is_correct = selected_answer == current_q[correct_reference]
+        user_answers.append(
+            {
+                "question": current_q["question"],
+                "selected_answer": selected_answer,
+                "correct_answer": current_q[correct_reference],
+                "is_correct": is_correct,
+                "correct_reference": correct_reference,
+            }
+        )
         question_idx += 1
     if question_idx >= len(quiz_data):
         correct_count = sum(1 for answer in user_answers if answer["is_correct"])
         grade = correct_count / len(user_answers)
             f"Your score: {grade:.1%}\n"
             f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
         )
+        return [
+            "",  # question_text
+            gr.update(choices=[], visible=False),  # hide radio choices
             f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
             question_idx,
             user_answers,
+            gr.update(visible=False),  # start button visibility
+            gr.update(visible=False),  # next button visibility
+            gr.update(visible=True),  # submit button visibility
+            results_text,  # final results text
+        ]
+    # Show next question
+    q = quiz_data[question_idx]
+    return [
+        f"## Question {question_idx + 1} \n### {q['question']}",  # question text
+        gr.update(  # properly update radio choices
+            choices=[q["answer_a"], q["answer_b"], q["answer_c"], q["answer_d"]],
+            value=None,
+            visible=True,
+        ),
+        "Select an answer and click 'Next' to continue.",
+        question_idx,
+        user_answers,
+        gr.update(visible=False),  # start button visibility
+        gr.update(visible=True),  # next button visibility
+        gr.update(visible=False),  # submit button visibility
+        "",  # clear final markdown
+    ]
 def success_message(response):
 with gr.Blocks() as demo:
     demo.title = f"Dataset Quiz for {EXAM_DATASET_ID}"
     # State variables
     question_idx = gr.State(value=0)
     user_answers = gr.State(value=[])
+    user_token = gr.State(value=None)
     with gr.Row(variant="compact"):
         gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
     with gr.Row(variant="compact"):
         gr.Markdown(
             "Log in first, then click 'Start' to begin. Answer each question, click 'Next', and finally click 'Submit' to publish your results to the Hugging Face Hub."
         )
+    with gr.Row(variant="panel"):
         question_text = gr.Markdown("")
         radio_choices = gr.Radio(
+            choices=[], label="Your Answer", scale=1.5, visible=False
         )
     with gr.Row(variant="compact"):
         status_text = gr.Markdown("")
+        final_markdown = gr.Markdown("")
     with gr.Row(variant="compact"):
+        login_btn = gr.LoginButton(visible=True)
+        start_btn = gr.Button("Start ⏭️", visible=True)
+        next_btn = gr.Button("Next ⏭️", visible=False)
+        submit_btn = gr.Button("Submit ✅", visible=False)
+    # Wire up the event handlers
+    login_btn.click(
+        fn=on_user_logged_in,
+        inputs=None,
+        outputs=[
+            login_btn,
+            start_btn,
+            next_btn,
+            submit_btn,
+            question_text,
+            radio_choices,
+            status_text,
+            question_idx,
+            user_answers,
+            final_markdown,
+            user_token,
+        ],
+    )
     start_btn.click(
         fn=handle_quiz,
+        inputs=[question_idx, user_answers, gr.State(""), gr.State(True)],
         outputs=[
             question_text,
             radio_choices,
             question_idx,
             user_answers,
             start_btn,
+            next_btn,
+            submit_btn,
             final_markdown,
         ],
     )
     next_btn.click(
         fn=handle_quiz,
         inputs=[question_idx, user_answers, radio_choices, gr.State(False)],
             question_idx,
             user_answers,
             start_btn,
+            next_btn,
+            submit_btn,
             final_markdown,
         ],
     )
     submit_btn.click(fn=push_results_to_hub, inputs=[user_answers])
 if __name__ == "__main__":
     # Note: If testing locally, you'll need to run `huggingface-cli login` or set HF_TOKEN
     # environment variable for the login to work locally.

data/supervised-finetuning.json ADDED Viewed

	@@ -0,0 +1,338 @@

+[
+    {
+        "question": "What is Supervised Fine-Tuning (SFT) in the context of LLMs?",
+        "answer_a": "A technique to make models run faster",
+        "answer_b": "A method to train models on specific tasks using labeled data",
+        "answer_c": "A way to reduce model size",
+        "answer_d": "A process to create new model architectures",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is LoRA (Low-Rank Adaptation)?",
+        "answer_a": "A new type of transformer architecture",
+        "answer_b": "A method to compress models after training",
+        "answer_c": "An efficient fine-tuning technique that updates a small number of trainable parameters",
+        "answer_d": "A data preprocessing technique",
+        "correct_answer": "C"
+    },
+    {
+        "question": "What is the main advantage of using LoRA for fine-tuning?",
+        "answer_a": "It makes models more accurate than full fine-tuning",
+        "answer_b": "It reduces memory requirements and training costs while maintaining performance",
+        "answer_c": "It allows training without any original model weights",
+        "answer_d": "It automatically improves model inference speed",
+        "correct_answer": "B"
+    },
+    {
+        "question": "In chat templates, what is the purpose of the 'system' message?",
+        "answer_a": "To log system errors",
+        "answer_b": "To define the behavior and role of the assistant",
+        "answer_c": "To store user preferences",
+        "answer_d": "To handle technical configurations",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is a common format marker in chat templates?",
+        "answer_a": "<|im_start|>",
+        "answer_b": "{BEGIN}",
+        "answer_c": "START_CHAT",
+        "answer_d": "<<START>>",
+        "correct_answer": "A"
+    },
+    {
+        "question": "What is the primary purpose of SFT datasets?",
+        "answer_a": "To create new model architectures",
+        "answer_b": "To train models to follow specific instructions and generate desired outputs",
+        "answer_c": "To test model performance only",
+        "answer_d": "To compress model size",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which statement about LoRA is TRUE?",
+        "answer_a": "It requires modifying the original model architecture",
+        "answer_b": "It can only be used with small models",
+        "answer_c": "It adds low-rank matrices to existing weights during training",
+        "answer_d": "It permanently changes the base model weights",
+        "correct_answer": "C"
+    },
+    {
+        "question": "What is a key benefit of using standardized chat templates?",
+        "answer_a": "They make models run faster",
+        "answer_b": "They ensure consistent formatting across different model implementations",
+        "answer_c": "They reduce model size",
+        "answer_d": "They eliminate the need for tokenization",
+        "correct_answer": "B"
+    },
+    {
+        "question": "In the context of SFT, what is a 'prompt-completion' pair?",
+        "answer_a": "Two separate models working together",
+        "answer_b": "A training example consisting of an input and its desired output",
+        "answer_c": "A type of model architecture",
+        "answer_d": "A method to compress training data",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is NOT a typical component of a chat template?",
+        "answer_a": "System message",
+        "answer_b": "User message",
+        "answer_c": "Assistant message",
+        "answer_d": "Database message",
+        "correct_answer": "D"
+    },
+    {
+        "question": "What is the purpose of the SFTTrainer in the TRL library?",
+        "answer_a": "To train models from scratch",
+        "answer_b": "To simplify the process of fine-tuning language models on instruction datasets",
+        "answer_c": "To evaluate model performance",
+        "answer_d": "To compress model weights",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is a key characteristic of LoRA's training approach?",
+        "answer_a": "It trains all model parameters",
+        "answer_b": "It only works with small models",
+        "answer_c": "It freezes the original model weights and injects trainable rank decomposition matrices",
+        "answer_d": "It requires multiple GPUs",
+        "correct_answer": "C"
+    },
+    {
+        "question": "Which parameter in LoRA determines the size of the rank decomposition matrices?",
+        "answer_a": "lora_alpha",
+        "answer_b": "r",
+        "answer_c": "dropout",
+        "answer_d": "bias",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the role of 'target_modules' in LoRA configuration?",
+        "answer_a": "To specify which layers to remove",
+        "answer_b": "To define which layers will be adapted with LoRA",
+        "answer_c": "To set the learning rate for each layer",
+        "answer_d": "To determine model output",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the purpose of chat template's 'add_generation_prompt' parameter?",
+        "answer_a": "To end the conversation",
+        "answer_b": "To add a prompt for the model to continue generating",
+        "answer_c": "To change the system message",
+        "answer_d": "To modify user input",
+        "correct_answer": "B"
+    },
+    {
+        "question": "In SFT training, what is gradient checkpointing used for?",
+        "answer_a": "To save training progress",
+        "answer_b": "To reduce memory usage during training",
+        "answer_c": "To increase model accuracy",
+        "answer_d": "To speed up training",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the purpose of the 'lora_alpha' parameter in LoRA?",
+        "answer_a": "To set the learning rate",
+        "answer_b": "To scale the LoRA weights during inference",
+        "answer_c": "To determine batch size",
+        "answer_d": "To control model size",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is a benefit of using the SFTTrainer?",
+        "answer_a": "It automatically handles padding and truncation of inputs",
+        "answer_b": "It creates new model architectures",
+        "answer_c": "It performs unsupervised learning",
+        "answer_d": "It generates training data",
+        "correct_answer": "A"
+    },
+    {
+        "question": "What is the purpose of 'formatting_func' in SFTTrainer?",
+        "answer_a": "To format the output text",
+        "answer_b": "To preprocess and structure the training data",
+        "answer_c": "To modify model architecture",
+        "answer_d": "To handle error messages",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is TRUE about LoRA training?",
+        "answer_a": "It requires more memory than full fine-tuning",
+        "answer_b": "It can only be used with specific model architectures",
+        "answer_c": "It allows efficient adaptation while keeping original weights frozen",
+        "answer_d": "It always produces better results than full fine-tuning",
+        "correct_answer": "C"
+    },
+    {
+        "question": "What is the purpose of 'max_seq_length' in SFTTrainer?",
+        "answer_a": "To limit the model's vocabulary size",
+        "answer_b": "To set the maximum length of input sequences",
+        "answer_c": "To determine the batch size",
+        "answer_d": "To control the learning rate",
+        "correct_answer": "B"
+    },
+    {
+        "question": "In chat templates, what is the purpose of conversation history?",
+        "answer_a": "To store user preferences",
+        "answer_b": "To maintain context across multiple turns of dialogue",
+        "answer_c": "To track error messages",
+        "answer_d": "To count tokens",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is a key advantage of using BitsAndBytes for SFT?",
+        "answer_a": "It makes training faster",
+        "answer_b": "It reduces memory usage through quantization",
+        "answer_c": "It improves model accuracy",
+        "answer_d": "It simplifies the code",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is NOT a typical parameter in LoRA configuration?",
+        "answer_a": "r",
+        "answer_b": "lora_alpha",
+        "answer_c": "model_size",
+        "answer_d": "target_modules",
+        "correct_answer": "C"
+    },
+    {
+        "question": "What is the purpose of 'warmup_ratio' in training arguments?",
+        "answer_a": "To set the final learning rate",
+        "answer_b": "To determine the portion of training used for learning rate warmup",
+        "answer_c": "To control model temperature",
+        "answer_d": "To set the batch size",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which statement about SFT datasets is TRUE?",
+        "answer_a": "They must always be in JSON format",
+        "answer_b": "They typically contain input-output pairs for training",
+        "answer_c": "They can only contain single-turn conversations",
+        "answer_d": "They must include system prompts",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the role of 'gradient_accumulation_steps' in training?",
+        "answer_a": "To speed up training",
+        "answer_b": "To simulate larger batch sizes with limited memory",
+        "answer_c": "To reduce model size",
+        "answer_d": "To improve accuracy",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is a common use case for LoRA?",
+        "answer_a": "Creating new model architectures",
+        "answer_b": "Adapting large models to specific tasks efficiently",
+        "answer_c": "Reducing model inference time",
+        "answer_d": "Generating training data",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the purpose of 'save_total_limit' in training arguments?",
+        "answer_a": "To limit the model's vocabulary",
+        "answer_b": "To control how many checkpoints are saved during training",
+        "answer_c": "To set the maximum sequence length",
+        "answer_d": "To limit training time",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which optimization technique is commonly used with LoRA?",
+        "answer_a": "SGD",
+        "answer_b": "AdamW",
+        "answer_c": "RMSprop",
+        "answer_d": "Momentum",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the most significant difference between full fine-tuning and LoRA?",
+        "answer_a": "LoRA updates a subset of model weights while full fine-tuning updates all weights",
+        "answer_b": "LoRA adds new parameters while keeping original weights frozen",
+        "answer_c": "LoRA modifies attention layers while full fine-tuning modifies feed-forward layers",
+        "answer_d": "LoRA trains faster but requires more memory than full fine-tuning",
+        "correct_answer": "B"
+    },
+    {
+        "question": "When implementing chat templates, which approach is most likely to maintain model performance?",
+        "answer_a": "Using the exact template format from the model's training data",
+        "answer_b": "Using a simplified template with just role and content",
+        "answer_c": "Using a standardized template across all models",
+        "answer_d": "Using a template with additional control tokens",
+        "correct_answer": "A"
+    },
+    {
+        "question": "What is the key technical innovation of LoRA's rank decomposition approach?",
+        "answer_a": "It reduces model parameters through matrix factorization",
+        "answer_b": "It decomposes weight updates into low-rank matrices while preserving model capacity",
+        "answer_c": "It compresses the model weights using SVD decomposition",
+        "answer_d": "It optimizes attention mechanisms through rank reduction",
+        "correct_answer": "B"
+    },
+    {
+        "question": "How does the 'r' parameter in LoRA affect the training process?",
+        "answer_a": "Higher r increases model capacity but requires more memory",
+        "answer_b": "Lower r reduces training time but may impact performance",
+        "answer_c": "Higher r improves convergence but increases computation",
+        "answer_d": "Lower r decreases memory usage but may limit expressiveness",
+        "correct_answer": "D"
+    },
+    {
+        "question": "What is the primary consideration when choosing target_modules for LoRA?",
+        "answer_a": "Selecting layers that most influence task-specific behavior",
+        "answer_b": "Targeting modules with the most parameters",
+        "answer_c": "Choosing layers closest to the model output",
+        "answer_d": "Selecting modules with the least impact on inference speed",
+        "correct_answer": "A"
+    },
+    {
+        "question": "How does gradient checkpointing affect the training process in SFT?",
+        "answer_a": "Trades computation time for reduced memory usage",
+        "answer_b": "Reduces memory by storing fewer activation gradients",
+        "answer_c": "Improves training stability through gradient accumulation",
+        "answer_d": "Optimizes memory by recomputing forward passes",
+        "correct_answer": "A"
+    },
+    {
+        "question": "What role does lora_alpha play in the training dynamics?",
+        "answer_a": "Controls the learning rate scaling of LoRA updates",
+        "answer_b": "Scales the contribution of LoRA weights during inference",
+        "answer_c": "Determines the initialization range of LoRA matrices",
+        "answer_d": "Adjusts the gradient flow through LoRA layers",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which aspect of SFT datasets most influences training effectiveness?",
+        "answer_a": "The diversity of instruction-output pairs",
+        "answer_b": "The total number of training examples",
+        "answer_c": "The complexity of individual instructions",
+        "answer_d": "The length of output sequences",
+        "correct_answer": "A"
+    },
+    {
+        "question": "How does warmup_ratio impact the training dynamics?",
+        "answer_a": "Prevents early overfitting by gradually increasing learning rate",
+        "answer_b": "Stabilizes initial training by ramping up learning rate",
+        "answer_c": "Reduces gradient variance in early training steps",
+        "answer_d": "Improves model convergence through learning rate scheduling",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What is the primary challenge addressed by gradient_accumulation_steps?",
+        "answer_a": "Memory constraints limiting batch size",
+        "answer_b": "Training instability with large learning rates",
+        "answer_c": "Slow convergence with small batches",
+        "answer_d": "Gradient vanishing in deep networks",
+        "correct_answer": "A"
+    },
+    {
+        "question": "How does BitsAndBytes quantization affect SFT training?",
+        "answer_a": "Reduces precision while maintaining training stability",
+        "answer_b": "Compresses weights with minimal performance impact",
+        "answer_c": "Optimizes memory usage through dynamic quantization",
+        "answer_d": "Balances precision and memory requirements",
+        "correct_answer": "D"
+    },
+    {
+        "question": "What distinguishes an effective chat template implementation?",
+        "answer_a": "Minimal special token usage with clear role separation",
+        "answer_b": "Consistent formatting with explicit turn boundaries",
+        "answer_c": "Efficient token usage while maintaining context",
+        "answer_d": "Flexible role definition with standardized markers",
+        "correct_answer": "C"
+    }
+]

example.json DELETED Viewed

@@ -1,82 +0,0 @@
-[
-    {
-        "question": "Which of the following best describes a Large Language Model (LLM)?",
-        "answer_a": "A model specializing in language recognition",
-        "answer_b": "A massive neural network that understands and generates human language",
-        "answer_c": "A model exclusively used for language data tasks like summarization or classification",
-        "answer_d": "A rule-based chatbot used for conversations",
-        "correct_answer": "B"
-    },
-    {
-        "question": "LLMs are typically:",
-        "answer_a": "Pre-trained on small, curated datasets",
-        "answer_b": "Trained on large text corpora to capture linguistic patterns",
-        "answer_c": "Trained purely on translation tasks",
-        "answer_d": "Designed to function solely with GPU resources",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which of the following is a common architecture for LLMs?",
-        "answer_a": "Convolutional Neural Networks (CNNs)",
-        "answer_b": "Transformer",
-        "answer_c": "Recurrent Neural Networks (RNNs) with LSTM",
-        "answer_d": "Support Vector Machines",
-        "correct_answer": "B"
-    },
-    {
-        "question": "What does it mean when we say LLMs are \"autoregressive\"?",
-        "answer_a": "They regress to the mean to reduce variance",
-        "answer_b": "They generate text by predicting the next token based on previous tokens",
-        "answer_c": "They can only handle labeled data",
-        "answer_d": "They can output text only after the entire input is known at once",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which of these is NOT a common use of LLMs?",
-        "answer_a": "Summarizing content",
-        "answer_b": "Generating code",
-        "answer_c": "Playing strategy games like chess or Go",
-        "answer_d": "Conversational AI",
-        "correct_answer": "C"
-    },
-    {
-        "question": "Which of the following best describes a \"special token\"?",
-        "answer_a": "A token that makes the model forget all context",
-        "answer_b": "A model signature required for API calls",
-        "answer_c": "A token that helps segment or structure the conversation in the model",
-        "answer_d": "A token that always represents the end of text",
-        "correct_answer": "C"
-    },
-    {
-        "question": "What is the primary goal of a \"chat template\"?",
-        "answer_a": "To force the model into a single-turn conversation",
-        "answer_b": "To structure interactions and define roles in a conversation",
-        "answer_c": "To replace the need for system messages",
-        "answer_d": "To store prompts into the model's weights permanently",
-        "correct_answer": "B"
-    },
-    {
-        "question": "How do tokenizers handle text for modern NLP models?",
-        "answer_a": "By splitting text into individual words only",
-        "answer_b": "By breaking words into subword units and assigning numerical IDs",
-        "answer_c": "By storing text directly without transformation",
-        "answer_d": "By removing all punctuation automatically",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which role in a conversation sets the overall behavior for a model?",
-        "answer_a": "user",
-        "answer_b": "system",
-        "answer_c": "assistant",
-        "answer_d": "developer",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which statement is TRUE about tool usage in chat templates?",
-        "answer_a": "Tools cannot be used within the conversation context.",
-        "answer_b": "Tools are used only for logging messages.",
-        "answer_c": "Tools allow the assistant to offload tasks like web search or calculations.",
-        "answer_d": "Tools are unsupported in all modern LLMs.",
-        "correct_answer": "C"
-    }
-]

push_questions.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import json
+from pathlib import Path
+from datasets import Dataset
+from huggingface_hub import HfApi
+ORG_NAME = "nlp-course"
+def main():
+    """Push quiz questions to the Hugging Face Hub"""
+    for file in Path("data").glob("*.json"):
+        print(f"Processing {file}")
+        with open(file, "r") as f:
+            quiz_data = json.load(f)
+        repo_id = f"{ORG_NAME}/{file.stem}_quiz"
+        dataset = Dataset.from_list(quiz_data)
+        print(f"Pushing {repo_id} to the Hugging Face Hub")
+        dataset.push_to_hub(
+            repo_id,
+            private=True,
+            commit_message=f"Update quiz questions for {file.stem}",
+        )
+if __name__ == "__main__":
+    main()

pyproject.toml CHANGED Viewed

@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "datasets>=3.2.0",
-    "gradio[oauth]>=5.13.1",
     "huggingface-hub>=0.27.1",
     "ipykernel>=6.29.5",
 ]

 requires-python = ">=3.11"
 dependencies = [
     "datasets>=3.2.0",
+    "gradio[oauth]==5.15.0",
     "huggingface-hub>=0.27.1",
     "ipykernel>=6.29.5",
 ]

requirements.txt CHANGED Viewed

@@ -28,12 +28,12 @@ ffmpy==0.5.0
 filelock==3.17.0
 frozenlist==1.5.0
 fsspec==2024.9.0
-gradio==5.13.1
-gradio-client==1.6.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
-huggingface-hub==0.27.1
 idna==3.10
 ipykernel==6.29.5
 ipython==8.31.0

 filelock==3.17.0
 frozenlist==1.5.0
 fsspec==2024.9.0
+gradio==5.15.0
+gradio-client==1.7.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
+huggingface-hub==0.28.1
 idna==3.10
 ipykernel==6.29.5
 ipython==8.31.0

uv.lock CHANGED Viewed

@@ -541,7 +541,7 @@ http = [
 [[package]]
 name = "gradio"
-version = "5.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiofiles" },
@@ -574,7 +574,7 @@ dependencies = [
     { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/4e/ac75bede5dc13ae8dfc79432f7578c30f2821265789918941457e4ccb940/gradio-5.13.1-py3-none-any.whl", hash = "sha256:ea5a5f2b1e0cd883211c1a8cc47d52343126e2c313762d26572bfcfb248da299", size = 57634647 },
 ]
 [package.optional-dependencies]
@@ -585,7 +585,7 @@ oauth = [
 [[package]]
 name = "gradio-client"
-version = "1.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fsspec" },
@@ -595,9 +595,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/d4/96a11493f6aea9d96ba5302b2e73725d07cee49c9d555190f4631372e028/gradio_client-1.6.0.tar.gz", hash = "sha256:1c6fae52181d483c010cfbc4e4df8520da33ab4365ab412acabc798d7022ad98", size = 319910 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9b/41/dc3358708c4e461bec82e9bbf5e7b1821b1fa45c5f078f3baeb3e8686f57/gradio_client-1.6.0-py3-none-any.whl", hash = "sha256:172175510a0cc92928f5d376e95e93f94d1558e4a360969fcc0dfc4c9e313872", size = 321777 },
 ]
 [[package]]
@@ -639,7 +639,7 @@ wheels = [
 [[package]]
 name = "huggingface-hub"
-version = "0.27.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -650,9 +650,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e1/d2/d6976de7542792fc077b498d64af64882b6d8bb40679284ec0bff77d5929/huggingface_hub-0.27.1.tar.gz", hash = "sha256:c004463ca870283909d715d20f066ebd6968c2207dae9393fdffb3c1d4d8f98b", size = 379407 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6c/3f/50f6b25fafdcfb1c089187a328c95081abf882309afd86f4053951507cd1/huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec", size = 450658 },
 ]
 [[package]]
@@ -1518,7 +1518,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "datasets", specifier = ">=3.2.0" },
-    { name = "gradio", extras = ["oauth"], specifier = ">=5.13.1" },
     { name = "huggingface-hub", specifier = ">=0.27.1" },
     { name = "ipykernel", specifier = ">=6.29.5" },
 ]

 [[package]]
 name = "gradio"
+version = "5.15.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiofiles" },
     { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/28/6a52bac8b13aca4f76baef03d5f840cc1f7486e879bc1ee19df51b7590d2/gradio-5.15.0-py3-none-any.whl", hash = "sha256:b0d72bf1e70c4a08283066c510d03bbdb9e378c1f806dd948e333c75bb22b3f1", size = 57766589 },
 ]
 [package.optional-dependencies]
 [[package]]
 name = "gradio-client"
+version = "1.7.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fsspec" },
     { name = "typing-extensions" },
     { name = "websockets" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/78/e5a4a2b0f4d1ba01ec4169e181a3134fc65b6360d40817070892c3557000/gradio_client-1.7.0.tar.gz", hash = "sha256:87f6ade197951f38bac0431b2a436a8ebb2f33b2ceba2ef8e1e5bef8d8b238e4", size = 320039 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/c1/def2bd93b8beab342c443bf5ac47f85e48b78eca010bbff51d6978472a3f/gradio_client-1.7.0-py3-none-any.whl", hash = "sha256:b403570c67f121ebbbc19ac1f0afa2ab1bab085ce60d96eb190832fe871aa946", size = 321900 },
 ]
 [[package]]
 [[package]]
 name = "huggingface-hub"
+version = "0.28.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/e7/ce/a734204aaae6c35a22f9956ebcd8d8708ae5b842e15d6f42bd6f49e634a4/huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae", size = 387074 }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/ea/da/6c2bea5327b640920267d3bf2c9fc114cfbd0a5de234d81cda80cc9e33c8/huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7", size = 464068 },
 ]
 [[package]]
 [package.metadata]
 requires-dist = [
     { name = "datasets", specifier = ">=3.2.0" },
+    { name = "gradio", extras = ["oauth"], specifier = "==5.15.0" },
     { name = "huggingface-hub", specifier = ">=0.27.1" },
     { name = "ipykernel", specifier = ">=6.29.5" },
 ]