File size: 33,113 Bytes

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "c1f06c162a994fe39bc1c72dcd732eb5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_0972d5d3a6c94e6aa5da01ac427bc98a",
              "IPY_MODEL_ad7adfc018ca4ebbbf582ea6e370dafe",
              "IPY_MODEL_5ed735ca184b45158e432a280e6c6b5c"
            ],
            "layout": "IPY_MODEL_8d9e6b2e8e3147118c319ba4788795c5"
          }
        },
        "0972d5d3a6c94e6aa5da01ac427bc98a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_826c9c8d73d448b182343775d0004feb",
            "placeholder": "",
            "style": "IPY_MODEL_cb6b93777f914372bb582e331faaae17",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "ad7adfc018ca4ebbbf582ea6e370dafe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_71cfafa9755245de98399af9ea8a1cce",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f01bf5b1b1b0433388823e3d3e2f7608",
            "value": 3
          }
        },
        "5ed735ca184b45158e432a280e6c6b5c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_839b0090d16949f8ab5ca3f550759432",
            "placeholder": "",
            "style": "IPY_MODEL_9e7bcd41202041eb91035eb005e2341f",
            "value": " 3/3 [00:26&lt;00:00,  8.65s/it]"
          }
        },
        "8d9e6b2e8e3147118c319ba4788795c5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "826c9c8d73d448b182343775d0004feb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cb6b93777f914372bb582e331faaae17": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "71cfafa9755245de98399af9ea8a1cce": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f01bf5b1b1b0433388823e3d3e2f7608": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "839b0090d16949f8ab5ca3f550759432": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9e7bcd41202041eb91035eb005e2341f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# `transformers` meets `bitsandbytes` for democratzing Large Language Models (LLMs) through 4bit quantization - **Fork by [crumb](https://hf.co/crumbly) for GPT2-linear-XL**\n",
        "\n",
        "<center>\n",
        "<img src=\"https://github.com/huggingface/blog/blob/main/assets/96_hf_bitsandbytes_integration/Thumbnail_blue.png?raw=true\" alt=\"drawing\" width=\"700\" class=\"center\"/>\n",
        "</center>\n",
        "\n",
        "Welcome to this notebook that goes through the recent `bitsandbytes` integration that includes the work that introduces no performance degradation 4bit quantization techniques, for democratizing LLMs inference and training.\n",
        "\n",
        "In this notebook, we will learn together how to load a large model in 4bit ~~(`gpt-neo-x-20b`)~~ (`gpt2-xl`) and train it using Google Colab and PEFT library from Hugging Face 🤗.\n",
        "\n",
        "[In the general usage notebook](https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf?usp=sharing), you can learn how to propely load a model in 4bit with all its variants.\n",
        "\n",
        "If you liked the previous work for integrating [*LLM.int8*](https://arxiv.org/abs/2208.07339), you can have a look at the [introduction blogpost](https://huggingface.co/blog/hf-bitsandbytes-integration) to lean more about that quantization method.\n"
      ],
      "metadata": {
        "id": "XIyP_0r6zuVc"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "FuXIFTFapAMI",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e9be514a-cf54-49d1-f359-6851312f4e65"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
          ]
        }
      ],
      "source": [
        "!pip install -q -U bitsandbytes\n",
        "!pip install -q -U git+https://github.com/huggingface/transformers.git\n",
        "!pip install -q -U git+https://github.com/huggingface/peft.git\n",
        "!pip install -q -U git+https://github.com/huggingface/accelerate.git\n",
        "!pip install -q datasets\n",
        "!pip install -q wandb"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "First let's load the model we are going to use - GPT2-XL"
      ],
      "metadata": {
        "id": "MJ-5idQwzvg-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig\n",
        "\n",
        "# we'll use the bf16 version because it takes up 1/2 the space\n",
        "# and is quicker to download\n",
        "model_id = \"crumbly/gpt2-linear-xl-sharded-bf16\"\n",
        "bnb_config = BitsAndBytesConfig(\n",
        "    load_in_4bit=True,\n",
        "    bnb_4bit_use_double_quant=True,\n",
        "    bnb_4bit_quant_type=\"nf4\",\n",
        "    bnb_4bit_compute_dtype=torch.bfloat16\n",
        ")\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
        "model = AutoModelForCausalLM.from_pretrained(model_id, device_map={\"\":0}, quantization_config=bnb_config, trust_remote_code=True)"
      ],
      "metadata": {
        "id": "E0Nl5mWL0k2T",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 153,
          "referenced_widgets": [
            "c1f06c162a994fe39bc1c72dcd732eb5",
            "0972d5d3a6c94e6aa5da01ac427bc98a",
            "ad7adfc018ca4ebbbf582ea6e370dafe",
            "5ed735ca184b45158e432a280e6c6b5c",
            "8d9e6b2e8e3147118c319ba4788795c5",
            "826c9c8d73d448b182343775d0004feb",
            "cb6b93777f914372bb582e331faaae17",
            "71cfafa9755245de98399af9ea8a1cce",
            "f01bf5b1b1b0433388823e3d3e2f7608",
            "839b0090d16949f8ab5ca3f550759432",
            "9e7bcd41202041eb91035eb005e2341f"
          ]
        },
        "outputId": "a550a9bf-0715-4be5-d03a-f5c348d0031b"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "A new version of the following files was downloaded from https://huggingface.co/crumbly/gpt2-linear-xl:\n",
            "- configuration_gpt2l.py\n",
            ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
            "A new version of the following files was downloaded from https://huggingface.co/crumbly/gpt2-linear-xl:\n",
            "- modeling_gpt2l.py\n",
            ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "c1f06c162a994fe39bc1c72dcd732eb5"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# generate just to verify that the model works and was loaded correctly\n",
        "inputs = {k:v.cuda() for k,v in tokenizer(\"Once upon a time,\", return_tensors='pt').items()}\n",
        "outputs = model.generate(**inputs, max_new_tokens=32, temperature=0.7, do_sample=True)\n",
        "tokenizer.decode(outputs[0])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 53
        },
        "id": "DLZpPRoM9eb5",
        "outputId": "3f75d30d-e097-4b24-cfce-d94e32352a7d"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'Once upon a time, it was said that the best way to predict the future was to take the actions of past generations and predict the future. Unfortunately, this is no longer true.'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# this isn't supported yet with the GPT2 model we use, but for other models:\n",
        "# uncomment these lines and run them\n",
        "# from peft import prepare_model_for_kbit_training\n",
        "# model.gradient_checkpointing_enable()\n",
        "# model = prepare_model_for_kbit_training(model)"
      ],
      "metadata": {
        "id": "a9EUEDAl0ss3"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def print_trainable_parameters(model):\n",
        "    \"\"\"\n",
        "    Prints the number of trainable parameters in the model.\n",
        "    \"\"\"\n",
        "    trainable_params = 0\n",
        "    all_param = 0\n",
        "    for _, param in model.named_parameters():\n",
        "        all_param += param.numel()\n",
        "        if param.requires_grad:\n",
        "            trainable_params += param.numel()\n",
        "    print(\n",
        "        f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n",
        "    )"
      ],
      "metadata": {
        "id": "gkIcwsSU01EB"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from peft import LoraConfig, get_peft_model\n",
        "\n",
        "config = LoraConfig(\n",
        "    # ReLoRA uses r=128 by default in their code, but r=1 will even work to a degree\n",
        "    r=8,\n",
        "    lora_alpha=32,\n",
        "    # c_attn is our qkv\n",
        "    target_modules=[\"c_attn\"],\n",
        "    lora_dropout=0.05,\n",
        "    bias=\"none\",\n",
        "    task_type=\"CAUSAL_LM\"\n",
        ")\n",
        "\n",
        "model = get_peft_model(model, config)\n",
        "print_trainable_parameters(model)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Ybeyl20n3dYH",
        "outputId": "c49d63f7-675f-4a81-fcf0-b6d7a0e3b688"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "trainable params: 2457600 || all params: 822788800 || trainable%: 0.2986914746530337\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Let's load a dataset, Open Orca, to fine tune our model on instruction sets. We'll use new lines to delimit between the system prompt, question, and response for simplicity."
      ],
      "metadata": {
        "id": "FCc64bfnmd3j"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "# we'll use streaming=True so we stream examples over the internet\n",
        "# rather than downloading the entire dataset to process\n",
        "data = load_dataset(\"Open-Orca/OpenOrca\", streaming=True)\n",
        "\n",
        "def strip(batch):\n",
        "    # to remove trailing spaces or newlines from our prompts\n",
        "    return [\n",
        "        i.strip() for i in list(batch)\n",
        "    ]\n",
        "\n",
        "def process(batch):\n",
        "    systems = [i for i in strip(batch['system_prompt'])]\n",
        "    questions = [i for i in strip(batch['question'])]\n",
        "    responses = [i for i in strip(batch['response'])]\n",
        "    prompts = zip(systems, questions, responses)\n",
        "    prompts = [\"\\n\".join(i) for i in prompts]\n",
        "    prompts = strip(prompts)\n",
        "    return prompts\n",
        "\n",
        "# we'll also set the max length to something lower than normal, so we don't go out-of-memory.\n",
        "tokenizer.model_max_length = 768\n",
        "data = data.map(lambda samples: tokenizer(process(samples), truncation=True), batched=True)"
      ],
      "metadata": {
        "id": "s6f4z8EYmcJ6"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Run the cell below to run the training! For the sake of the demo, we just ran it for few steps just to showcase how to use this integration with existing tools on the HF ecosystem."
      ],
      "metadata": {
        "id": "_0MOtwf3zdZp"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import transformers\n",
        "\n",
        "# needed for gpt-neo-x tokenizer\n",
        "tokenizer.pad_token = tokenizer.eos_token\n",
        "\n",
        "trainer = transformers.Trainer(\n",
        "    model=model,\n",
        "    train_dataset=data[\"train\"],\n",
        "    args=transformers.TrainingArguments(\n",
        "        # your 'effective batch size' is the product of these two numbers\n",
        "        per_device_train_batch_size=1,\n",
        "        gradient_accumulation_steps=8,\n",
        "\n",
        "        # you can count the examples you're going to train on by\n",
        "        # multiplying max_steps by your effective batch size\n",
        "        # here we'll train on 512 examples, for example\n",
        "        max_steps=64,\n",
        "        warmup_steps=16,\n",
        "\n",
        "        learning_rate=2e-4,\n",
        "        fp16=True,\n",
        "        logging_steps=4,\n",
        "        output_dir=\"outputs\",\n",
        "        optim=\"paged_adamw_8bit\",\n",
        "\n",
        "        # if you want to log the loss graph to your wandb, change \"none\" to \"wandb\"\n",
        "        report_to=\"none\"\n",
        "    ),\n",
        "    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
        ")\n",
        "model.config.use_cache = False  # silence the warnings. Please re-enable for inference!\n",
        "trainer.train()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 629
        },
        "id": "jq0nX33BmfaC",
        "outputId": "910a63f5-d181-40da-f24b-c6a968fa8415"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='64' max='64' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [64/64 04:43, Epoch 1/9223372036854775807]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>4</td>\n",
              "      <td>2.849300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>8</td>\n",
              "      <td>2.507900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>12</td>\n",
              "      <td>2.744300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>16</td>\n",
              "      <td>2.537700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>20</td>\n",
              "      <td>2.808800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>24</td>\n",
              "      <td>2.619400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>28</td>\n",
              "      <td>2.521000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>32</td>\n",
              "      <td>2.543500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>36</td>\n",
              "      <td>2.439600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>40</td>\n",
              "      <td>2.369900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>44</td>\n",
              "      <td>2.448100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>48</td>\n",
              "      <td>2.389500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>52</td>\n",
              "      <td>2.331100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>56</td>\n",
              "      <td>2.366500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>60</td>\n",
              "      <td>2.401100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>64</td>\n",
              "      <td>2.153900</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=64, training_loss=2.5019835233688354, metrics={'train_runtime': 303.3326, 'train_samples_per_second': 1.688, 'train_steps_per_second': 0.211, 'total_flos': 802220553600000.0, 'train_loss': 2.5019835233688354, 'epoch': 1.0})"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "inputs = {k:v.cuda() for k,v in tokenizer(\"\"\"\n",
        "You are an AI assistant. You will be given a question. You must generate a short and factual answer.\n",
        "What is the capital city of France?\n",
        "\"\"\", return_tensors='pt').items()}\n",
        "outputs = model.generate(**inputs, max_new_tokens=16, temperature=0.5, do_sample=True)\n",
        "print(tokenizer.decode(outputs[0]), \"...\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wr6bfZ0wyk3c",
        "outputId": "ca4ede1b-7456-43ea-ce52-961c1383dff8"
      },
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "You are an AI assistant. You will be given a question. You must generate a short and factual answer.\n",
            "What is the capital city of France?\n",
            "\n",
            "\n",
            "Paris\n",
            "\n",
            "Paris is the capital of France. The city is located ...\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "To save your adapters, you can either use\n",
        "\n",
        "```python\n",
        "model.save_pretrained(\"local_folder\")\n",
        "```\n",
        "\n",
        "or push them to the hub with\n",
        "\n",
        "```python\n",
        "model.push_to_hub(\"myusername/my_repo\")\n",
        "```\n",
        "\n",
        "If you would like to merge the adapters into your model, you'll have to load the base model again without quantization, and merge them like this.\n",
        "\n",
        "```python\n",
        "from peft import PeftModel\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
        "\n",
        "model = AutoModelForCausalLM.from_pretrained(\"crumbly/gpt2-linear-xl-sharded-bf16\")\n",
        "model = PeftModel.from_pretrained(model, \"myusername/my_repo\")\n",
        "model = model.merge_and_unload()\n",
        "```\n",
        "\n",
        "You can then push that to the hub or save it to a local folder like before, but including all of the weights."
      ],
      "metadata": {
        "id": "NsGnWFe8mr0p"
      }
    }
  ]
}