{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard", "widgets": { "application/vnd.jupyter.widget-state+json": { "c1f06c162a994fe39bc1c72dcd732eb5": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0972d5d3a6c94e6aa5da01ac427bc98a", "IPY_MODEL_ad7adfc018ca4ebbbf582ea6e370dafe", "IPY_MODEL_5ed735ca184b45158e432a280e6c6b5c" ], "layout": "IPY_MODEL_8d9e6b2e8e3147118c319ba4788795c5" } }, "0972d5d3a6c94e6aa5da01ac427bc98a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_826c9c8d73d448b182343775d0004feb", "placeholder": "", "style": "IPY_MODEL_cb6b93777f914372bb582e331faaae17", "value": "Loading checkpoint shards: 100%" } }, "ad7adfc018ca4ebbbf582ea6e370dafe": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_71cfafa9755245de98399af9ea8a1cce", "max": 3, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_f01bf5b1b1b0433388823e3d3e2f7608", "value": 3 } }, "5ed735ca184b45158e432a280e6c6b5c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_839b0090d16949f8ab5ca3f550759432", "placeholder": "", "style": "IPY_MODEL_9e7bcd41202041eb91035eb005e2341f", "value": " 3/3 [00:26<00:00, 8.65s/it]" } }, "8d9e6b2e8e3147118c319ba4788795c5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "826c9c8d73d448b182343775d0004feb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cb6b93777f914372bb582e331faaae17": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "71cfafa9755245de98399af9ea8a1cce": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f01bf5b1b1b0433388823e3d3e2f7608": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "839b0090d16949f8ab5ca3f550759432": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e7bcd41202041eb91035eb005e2341f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# `transformers` meets `bitsandbytes` for democratzing Large Language Models (LLMs) through 4bit quantization - **Fork by [crumb](https://hf.co/crumbly) for GPT2-linear-XL**\n", "\n", "
Step | \n", "Training Loss | \n", "
---|---|
4 | \n", "2.849300 | \n", "
8 | \n", "2.507900 | \n", "
12 | \n", "2.744300 | \n", "
16 | \n", "2.537700 | \n", "
20 | \n", "2.808800 | \n", "
24 | \n", "2.619400 | \n", "
28 | \n", "2.521000 | \n", "
32 | \n", "2.543500 | \n", "
36 | \n", "2.439600 | \n", "
40 | \n", "2.369900 | \n", "
44 | \n", "2.448100 | \n", "
48 | \n", "2.389500 | \n", "
52 | \n", "2.331100 | \n", "
56 | \n", "2.366500 | \n", "
60 | \n", "2.401100 | \n", "
64 | \n", "2.153900 | \n", "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=64, training_loss=2.5019835233688354, metrics={'train_runtime': 303.3326, 'train_samples_per_second': 1.688, 'train_steps_per_second': 0.211, 'total_flos': 802220553600000.0, 'train_loss': 2.5019835233688354, 'epoch': 1.0})" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "inputs = {k:v.cuda() for k,v in tokenizer(\"\"\"\n", "You are an AI assistant. You will be given a question. You must generate a short and factual answer.\n", "What is the capital city of France?\n", "\"\"\", return_tensors='pt').items()}\n", "outputs = model.generate(**inputs, max_new_tokens=16, temperature=0.5, do_sample=True)\n", "print(tokenizer.decode(outputs[0]), \"...\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wr6bfZ0wyk3c", "outputId": "ca4ede1b-7456-43ea-ce52-961c1383dff8" }, "execution_count": 16, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "\n", "You are an AI assistant. You will be given a question. You must generate a short and factual answer.\n", "What is the capital city of France?\n", "\n", "\n", "Paris\n", "\n", "Paris is the capital of France. The city is located ...\n" ] } ] }, { "cell_type": "markdown", "source": [ "To save your adapters, you can either use\n", "\n", "```python\n", "model.save_pretrained(\"local_folder\")\n", "```\n", "\n", "or push them to the hub with\n", "\n", "```python\n", "model.push_to_hub(\"myusername/my_repo\")\n", "```\n", "\n", "If you would like to merge the adapters into your model, you'll have to load the base model again without quantization, and merge them like this.\n", "\n", "```python\n", "from peft import PeftModel\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\"crumbly/gpt2-linear-xl-sharded-bf16\")\n", "model = PeftModel.from_pretrained(model, \"myusername/my_repo\")\n", "model = model.merge_and_unload()\n", "```\n", "\n", "You can then push that to the hub or save it to a local folder like before, but including all of the weights." ], "metadata": { "id": "NsGnWFe8mr0p" } } ] }