{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "A100",
      "machine_shape": "hm"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "ee9c321418ce4322a0d6b28a3f2ca6a1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e3e7f4f191ab43b581e38d2b79dc3e89",
              "IPY_MODEL_d4d0156eae3c41fe9f1f7ffd1cdd8c35",
              "IPY_MODEL_8cf941ff295e449589a2d9a454cfefba"
            ],
            "layout": "IPY_MODEL_a592ff42cd3346bca504eedba0f3955b"
          }
        },
        "e3e7f4f191ab43b581e38d2b79dc3e89": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_417708e3b88b4764a97ad0d6ef78e38b",
            "placeholder": "​",
            "style": "IPY_MODEL_ef7b5cca37be4c0285aa863783edd83d",
            "value": "README.md: 100%"
          }
        },
        "d4d0156eae3c41fe9f1f7ffd1cdd8c35": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_66dc16b4997f46439bed98ad8d0e8732",
            "max": 2872,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_cbd8f4782051447085d2c4c84b8185fb",
            "value": 2872
          }
        },
        "8cf941ff295e449589a2d9a454cfefba": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f20f1c3e78e84e46b9f7022c791775e4",
            "placeholder": "​",
            "style": "IPY_MODEL_d87fdc84c0eb4243804b5e6c86f7eb78",
            "value": " 2.87k/2.87k [00:00<00:00, 172kB/s]"
          }
        },
        "a592ff42cd3346bca504eedba0f3955b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "417708e3b88b4764a97ad0d6ef78e38b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef7b5cca37be4c0285aa863783edd83d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "66dc16b4997f46439bed98ad8d0e8732": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cbd8f4782051447085d2c4c84b8185fb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f20f1c3e78e84e46b9f7022c791775e4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d87fdc84c0eb4243804b5e6c86f7eb78": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5ccea58adc994a8082d77a0fd3dd5175": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_2bfa1a551787407397a7d14faffaa471",
              "IPY_MODEL_f3bec46279cd4f99a7e7693aca5a463a",
              "IPY_MODEL_f275a7c407fa4b699039ed7bd9a7cdec"
            ],
            "layout": "IPY_MODEL_7e25a8d5567f474cb450095ed1b409fa"
          }
        },
        "2bfa1a551787407397a7d14faffaa471": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f4c4efcc5f10400caac3695865596340",
            "placeholder": "​",
            "style": "IPY_MODEL_65c6ebc06606451bb347f7291f4af0cb",
            "value": "train-00000-of-00001.parquet: 100%"
          }
        },
        "f3bec46279cd4f99a7e7693aca5a463a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_751e56f58fef4cfa8dbc032790667c4f",
            "max": 13578023,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_ea9b7f2ae3cc4d819d1eedd09b498f4d",
            "value": 13578023
          }
        },
        "f275a7c407fa4b699039ed7bd9a7cdec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e631c93fc21348528300a7a1013d39f5",
            "placeholder": "​",
            "style": "IPY_MODEL_e3caeb64cd5e4cb08265ebcb7040b340",
            "value": " 13.6M/13.6M [00:00<00:00, 35.5MB/s]"
          }
        },
        "7e25a8d5567f474cb450095ed1b409fa": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f4c4efcc5f10400caac3695865596340": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "65c6ebc06606451bb347f7291f4af0cb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "751e56f58fef4cfa8dbc032790667c4f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ea9b7f2ae3cc4d819d1eedd09b498f4d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e631c93fc21348528300a7a1013d39f5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e3caeb64cd5e4cb08265ebcb7040b340": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "029202e5e41d404385ac4a3a36989700": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9069aafbe84f47a6860ba99ff9de8fcd",
              "IPY_MODEL_41e04d59393245c898bae1bd80c824fe",
              "IPY_MODEL_acc44cac406a4f51ad13b82ad141f9ab"
            ],
            "layout": "IPY_MODEL_254f3a26f42e4ce193c7ea8283eb77af"
          }
        },
        "9069aafbe84f47a6860ba99ff9de8fcd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_50566b0a37d64188beea5ea55edf60bc",
            "placeholder": "​",
            "style": "IPY_MODEL_bbbd5baf90614ccaae15010e1982bd96",
            "value": "Generating train split: 100%"
          }
        },
        "41e04d59393245c898bae1bd80c824fe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6ddb7eaee2b7405e988582e5f6edc4d0",
            "max": 99545,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_5a1097b3c10846e29c9b838e9b3d41e7",
            "value": 99545
          }
        },
        "acc44cac406a4f51ad13b82ad141f9ab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8b7e0c70f2e349fbae1146ee73014ebc",
            "placeholder": "​",
            "style": "IPY_MODEL_ed9ff4cd49b646288e6d3d74efcec647",
            "value": " 99545/99545 [00:00<00:00, 354750.16 examples/s]"
          }
        },
        "254f3a26f42e4ce193c7ea8283eb77af": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "50566b0a37d64188beea5ea55edf60bc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "bbbd5baf90614ccaae15010e1982bd96": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6ddb7eaee2b7405e988582e5f6edc4d0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5a1097b3c10846e29c9b838e9b3d41e7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8b7e0c70f2e349fbae1146ee73014ebc": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ed9ff4cd49b646288e6d3d74efcec647": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "880e3efa6bcc49cd98207c04c476e918": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_76ebb3ae1d354a139207ba45b0161206",
              "IPY_MODEL_8415ed49d34846efbb7d84341fe931c5",
              "IPY_MODEL_749f0a339075455f97b18ae42163457f"
            ],
            "layout": "IPY_MODEL_4f65674d3ae44bd7aa89873233d9c421"
          }
        },
        "76ebb3ae1d354a139207ba45b0161206": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c454e4d7594543ee971fd29f32c02e0b",
            "placeholder": "​",
            "style": "IPY_MODEL_f5a82328e6e842eeb772c171ccdb57ab",
            "value": "Map: 100%"
          }
        },
        "8415ed49d34846efbb7d84341fe931c5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f7cfa914ad6841559b1eb9b203474000",
            "max": 99545,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6c09c2b4125d4eadad1a446095209d7f",
            "value": 99545
          }
        },
        "749f0a339075455f97b18ae42163457f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b5d8b7c444214f2ab616b12eec614716",
            "placeholder": "​",
            "style": "IPY_MODEL_ea8e7d69c63f491ca615cddb77c0dde2",
            "value": " 99545/99545 [00:16<00:00, 7233.81 examples/s]"
          }
        },
        "4f65674d3ae44bd7aa89873233d9c421": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c454e4d7594543ee971fd29f32c02e0b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f5a82328e6e842eeb772c171ccdb57ab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f7cfa914ad6841559b1eb9b203474000": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6c09c2b4125d4eadad1a446095209d7f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b5d8b7c444214f2ab616b12eec614716": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ea8e7d69c63f491ca615cddb77c0dde2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "696d4ebbf97c44e9a6cdff707b87e953": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_b7abe11e0fb041d49137296e1d4bf43e",
              "IPY_MODEL_83d1e890914f4206b53e7c8fd3eafdec",
              "IPY_MODEL_97258ff7b0874640bcb47e1d1beb73ad"
            ],
            "layout": "IPY_MODEL_ba56149214a7449ab4023150af34380e"
          }
        },
        "b7abe11e0fb041d49137296e1d4bf43e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e7b683ab266740d2b97981ffd12e0f47",
            "placeholder": "​",
            "style": "IPY_MODEL_34884b30a6e24ef2b7992e39fbd487df",
            "value": "tokenizer_config.json: 100%"
          }
        },
        "83d1e890914f4206b53e7c8fd3eafdec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6d8d6f3c10e54a99926a77759aed7980",
            "max": 373,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2aaa0e920af74c91b1497d4e2f0d7c7f",
            "value": 373
          }
        },
        "97258ff7b0874640bcb47e1d1beb73ad": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_33b46ebefd7a4e19b20c9113a6306e33",
            "placeholder": "​",
            "style": "IPY_MODEL_46ffdf824b8e496bb4c739375d6a4255",
            "value": " 373/373 [00:00<00:00, 29.3kB/s]"
          }
        },
        "ba56149214a7449ab4023150af34380e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e7b683ab266740d2b97981ffd12e0f47": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "34884b30a6e24ef2b7992e39fbd487df": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6d8d6f3c10e54a99926a77759aed7980": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2aaa0e920af74c91b1497d4e2f0d7c7f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "33b46ebefd7a4e19b20c9113a6306e33": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "46ffdf824b8e496bb4c739375d6a4255": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "bddefea747c1479f94c77a3dd33d9e24": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e1c7251d48a24f68a7990588c43bb431",
              "IPY_MODEL_4abaea6640c0425986e4ef171c6657cc",
              "IPY_MODEL_24c89f9d90be49ce8ebd052ab3377f6e"
            ],
            "layout": "IPY_MODEL_b933dfc3cb8e442d97be2ed145189923"
          }
        },
        "e1c7251d48a24f68a7990588c43bb431": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3d17626e1daf485e913ba3d8e9904763",
            "placeholder": "​",
            "style": "IPY_MODEL_072e21b6c92e4c7fb79e3deb3ea26002",
            "value": "vocab.txt: 100%"
          }
        },
        "4abaea6640c0425986e4ef171c6657cc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f09e1e08239740299aa68312c3c41d22",
            "max": 251003,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_b1387e3381de4968938d371692b8e464",
            "value": 251003
          }
        },
        "24c89f9d90be49ce8ebd052ab3377f6e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_90ebf3029fd447ce9ca0dc82a120c7d5",
            "placeholder": "​",
            "style": "IPY_MODEL_ce0ccd2032d24fb8b94e4b3c047569d4",
            "value": " 251k/251k [00:00<00:00, 7.94MB/s]"
          }
        },
        "b933dfc3cb8e442d97be2ed145189923": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3d17626e1daf485e913ba3d8e9904763": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "072e21b6c92e4c7fb79e3deb3ea26002": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f09e1e08239740299aa68312c3c41d22": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b1387e3381de4968938d371692b8e464": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "90ebf3029fd447ce9ca0dc82a120c7d5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ce0ccd2032d24fb8b94e4b3c047569d4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1b2bd87619f045d5b0a7e346181f2e1e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_5c9ae0652af44b92be325b553edba0d7",
              "IPY_MODEL_629332724bc04a28b67848977545ebc9",
              "IPY_MODEL_9761b542588f48b6bee3f472534af6c4"
            ],
            "layout": "IPY_MODEL_5ef9e949110d45e984cb5e079c434743"
          }
        },
        "5c9ae0652af44b92be325b553edba0d7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0a84cbe234c44ee0a5ee0841760ab58c",
            "placeholder": "​",
            "style": "IPY_MODEL_6b378450c5444334847af9ea1710b5ec",
            "value": "tokenizer.json: 100%"
          }
        },
        "629332724bc04a28b67848977545ebc9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_549ca07ce27a41f1ba0565a7035e1295",
            "max": 497438,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2e6cba86589a49449d63482bde3aef7d",
            "value": 497438
          }
        },
        "9761b542588f48b6bee3f472534af6c4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4ad95f20bbad48a7b9386f83946cdd70",
            "placeholder": "​",
            "style": "IPY_MODEL_9e1ffe13e96b424abea09ab9b52b7f8a",
            "value": " 497k/497k [00:00<00:00, 16.5MB/s]"
          }
        },
        "5ef9e949110d45e984cb5e079c434743": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0a84cbe234c44ee0a5ee0841760ab58c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6b378450c5444334847af9ea1710b5ec": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "549ca07ce27a41f1ba0565a7035e1295": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2e6cba86589a49449d63482bde3aef7d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "4ad95f20bbad48a7b9386f83946cdd70": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9e1ffe13e96b424abea09ab9b52b7f8a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "6707a1f508b14ec38bbe5a79f8577806": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e9401712d1854627bb1ae2d7b3d9ecd2",
              "IPY_MODEL_9b1ebf0e78ad4b52b0401245d611a8bc",
              "IPY_MODEL_a3593c2e810943ffa55e63596d749733"
            ],
            "layout": "IPY_MODEL_ac343eb8c4c14a4c85c2b4f14ecfce7a"
          }
        },
        "e9401712d1854627bb1ae2d7b3d9ecd2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_543afb09b2384517888421a011a88586",
            "placeholder": "​",
            "style": "IPY_MODEL_ef6775804f044e5d8c654d42cf74bd08",
            "value": "special_tokens_map.json: 100%"
          }
        },
        "9b1ebf0e78ad4b52b0401245d611a8bc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7b43d651119e436c9d40f290f9eedf68",
            "max": 112,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_10dd21983d9c42b38374028b115e63e1",
            "value": 112
          }
        },
        "a3593c2e810943ffa55e63596d749733": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_4107503fdb674b1c8c60685fb5fbe1db",
            "placeholder": "​",
            "style": "IPY_MODEL_cb2ee95977c94e138024ca77bd05e794",
            "value": " 112/112 [00:00<00:00, 9.93kB/s]"
          }
        },
        "ac343eb8c4c14a4c85c2b4f14ecfce7a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "543afb09b2384517888421a011a88586": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef6775804f044e5d8c654d42cf74bd08": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7b43d651119e436c9d40f290f9eedf68": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "10dd21983d9c42b38374028b115e63e1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "4107503fdb674b1c8c60685fb5fbe1db": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cb2ee95977c94e138024ca77bd05e794": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d0e50ec0c96741d48e76063bb183b8fa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_8085b5ba4ec34e638ba014fbb37e21d4",
              "IPY_MODEL_a4a3807c55e8411cbc6c44c15180787b",
              "IPY_MODEL_9a9197e5815e472eadda980fce33b8cf"
            ],
            "layout": "IPY_MODEL_9cdb83209b154af39455ac4009bb5593"
          }
        },
        "8085b5ba4ec34e638ba014fbb37e21d4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d76e0ec95e354f8591f4dfc72f4039f9",
            "placeholder": "​",
            "style": "IPY_MODEL_7cf4e5265dbb4092bfd7b8b7aedbd25f",
            "value": "Map: 100%"
          }
        },
        "a4a3807c55e8411cbc6c44c15180787b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bd3c0665a69c4b6aa723fdf30454d337",
            "max": 99545,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_9149f0d1909f4dac95d3696fb8d61529",
            "value": 99545
          }
        },
        "9a9197e5815e472eadda980fce33b8cf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e2e9e694e8dc436aba563d05de7e786b",
            "placeholder": "​",
            "style": "IPY_MODEL_9e65b1fdbdbd4e91b1c32a6fa52c7561",
            "value": " 99545/99545 [00:53<00:00, 1716.94 examples/s]"
          }
        },
        "9cdb83209b154af39455ac4009bb5593": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d76e0ec95e354f8591f4dfc72f4039f9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7cf4e5265dbb4092bfd7b8b7aedbd25f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "bd3c0665a69c4b6aa723fdf30454d337": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9149f0d1909f4dac95d3696fb8d61529": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e2e9e694e8dc436aba563d05de7e786b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9e65b1fdbdbd4e91b1c32a6fa52c7561": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!pip install transformers datasets seqeval huggingface_hub\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5v8KnAaD-z9t",
        "outputId": "ec89bbe3-e698-4e6f-eb27-ea15e3d2a549"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n",
            "Collecting datasets\n",
            "  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n",
            "Collecting seqeval\n",
            "  Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.24.7)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n",
            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n",
            "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.6)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n",
            "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
            "  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n",
            "Collecting xxhash (from datasets)\n",
            "  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
            "Collecting multiprocess<0.70.17 (from datasets)\n",
            "  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
            "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)\n",
            "  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n",
            "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.5.2)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.12.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
            "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.17.0)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.3)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.8.30)\n",
            "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.13.1)\n",
            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n",
            "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n",
            "Downloading datasets-3.1.0-py3-none-any.whl (480 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: seqeval\n",
            "  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=c1030f22e743c2a4b8d3cb548cb8e8f138f24b58417a9f47bbc72df908c59d18\n",
            "  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n",
            "Successfully built seqeval\n",
            "Installing collected packages: xxhash, fsspec, dill, multiprocess, seqeval, datasets\n",
            "  Attempting uninstall: fsspec\n",
            "    Found existing installation: fsspec 2024.10.0\n",
            "    Uninstalling fsspec-2024.10.0:\n",
            "      Successfully uninstalled fsspec-2024.10.0\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed datasets-3.1.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 seqeval-1.2.2 xxhash-3.5.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Standard library imports\n",
        "import os                 # Provides functions for interacting with the operating system\n",
        "import warnings           # Used to handle or suppress warnings\n",
        "import numpy as np        # Essential for numerical operations and array manipulation\n",
        "import torch              # PyTorch library for tensor computations and model handling\n",
        "import ast                # Used for safe evaluation of strings to Python objects (e.g., parsing tokens)\n",
        "\n",
        "# Hugging Face and Transformers imports\n",
        "from datasets import load_dataset                     # Loads datasets for model training and evaluation\n",
        "from transformers import (\n",
        "    AutoTokenizer,                                   # Initializes a tokenizer from a pre-trained model\n",
        "    DataCollatorForTokenClassification,              # Handles padding and formatting of token classification data\n",
        "    TrainingArguments,                               # Defines training parameters like batch size and learning rate\n",
        "    Trainer,                                         # High-level API for managing training and evaluation\n",
        "    AutoModelForTokenClassification,                 # Loads a pre-trained model for token classification tasks\n",
        "    get_linear_schedule_with_warmup,                 # Learning rate scheduler for gradual warm-up and linear decay\n",
        "    EarlyStoppingCallback                           # Callback to stop training if validation performance plateaus\n",
        ")\n",
        "\n",
        "# Hugging Face Hub\n",
        "from huggingface_hub import login                   # Allows logging in to Hugging Face Hub to upload models\n",
        "\n",
        "# seqeval metrics for NER evaluation\n",
        "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n",
        "# Provides precision, recall, F1-score, and classification report for evaluating NER model performance\n"
      ],
      "metadata": {
        "id": "amREIFSH-z7r"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Log in to Hugging Face Hub\n",
        "login(token=\"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\")\n"
      ],
      "metadata": {
        "id": "K7adlboI-z4p",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "71221522-e2cb-446f-91d9-d6f5b3a5ef08"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
            "Token is valid (permission: fineGrained).\n",
            "Your token has been saved to /root/.cache/huggingface/token\n",
            "Login successful\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Disable WandB (Weights & Biases) logging to avoid unwanted log outputs during training\n",
        "os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
        "\n",
        "# Suppress warning messages to keep output clean, especially during training and evaluation\n",
        "warnings.filterwarnings(\"ignore\")\n"
      ],
      "metadata": {
        "id": "Qccgsjfs-zzA"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Load the Azerbaijani NER dataset from Hugging Face\n",
        "dataset = load_dataset(\"LocalDoc/azerbaijani-ner-dataset\")\n",
        "print(dataset)  # Display dataset structure (e.g., train/validation splits)\n",
        "\n",
        "# Preprocessing function to format tokens and NER tags correctly\n",
        "def preprocess_example(example):\n",
        "    try:\n",
        "        # Convert string of tokens to a list and parse NER tags to integers\n",
        "        example[\"tokens\"] = ast.literal_eval(example[\"tokens\"])\n",
        "        example[\"ner_tags\"] = list(map(int, ast.literal_eval(example[\"ner_tags\"])))\n",
        "    except (ValueError, SyntaxError) as e:\n",
        "        # Skip and log malformed examples, ensuring error resilience\n",
        "        print(f\"Skipping malformed example: {example['index']} due to error: {e}\")\n",
        "        example[\"tokens\"] = []\n",
        "        example[\"ner_tags\"] = []\n",
        "    return example\n",
        "\n",
        "# Apply preprocessing to each dataset entry, ensuring consistent formatting\n",
        "dataset = dataset.map(preprocess_example)\n"
      ],
      "metadata": {
        "id": "fQ6ttUM8-zwM",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 604,
          "referenced_widgets": [
            "ee9c321418ce4322a0d6b28a3f2ca6a1",
            "e3e7f4f191ab43b581e38d2b79dc3e89",
            "d4d0156eae3c41fe9f1f7ffd1cdd8c35",
            "8cf941ff295e449589a2d9a454cfefba",
            "a592ff42cd3346bca504eedba0f3955b",
            "417708e3b88b4764a97ad0d6ef78e38b",
            "ef7b5cca37be4c0285aa863783edd83d",
            "66dc16b4997f46439bed98ad8d0e8732",
            "cbd8f4782051447085d2c4c84b8185fb",
            "f20f1c3e78e84e46b9f7022c791775e4",
            "d87fdc84c0eb4243804b5e6c86f7eb78",
            "5ccea58adc994a8082d77a0fd3dd5175",
            "2bfa1a551787407397a7d14faffaa471",
            "f3bec46279cd4f99a7e7693aca5a463a",
            "f275a7c407fa4b699039ed7bd9a7cdec",
            "7e25a8d5567f474cb450095ed1b409fa",
            "f4c4efcc5f10400caac3695865596340",
            "65c6ebc06606451bb347f7291f4af0cb",
            "751e56f58fef4cfa8dbc032790667c4f",
            "ea9b7f2ae3cc4d819d1eedd09b498f4d",
            "e631c93fc21348528300a7a1013d39f5",
            "e3caeb64cd5e4cb08265ebcb7040b340",
            "029202e5e41d404385ac4a3a36989700",
            "9069aafbe84f47a6860ba99ff9de8fcd",
            "41e04d59393245c898bae1bd80c824fe",
            "acc44cac406a4f51ad13b82ad141f9ab",
            "254f3a26f42e4ce193c7ea8283eb77af",
            "50566b0a37d64188beea5ea55edf60bc",
            "bbbd5baf90614ccaae15010e1982bd96",
            "6ddb7eaee2b7405e988582e5f6edc4d0",
            "5a1097b3c10846e29c9b838e9b3d41e7",
            "8b7e0c70f2e349fbae1146ee73014ebc",
            "ed9ff4cd49b646288e6d3d74efcec647",
            "880e3efa6bcc49cd98207c04c476e918",
            "76ebb3ae1d354a139207ba45b0161206",
            "8415ed49d34846efbb7d84341fe931c5",
            "749f0a339075455f97b18ae42163457f",
            "4f65674d3ae44bd7aa89873233d9c421",
            "c454e4d7594543ee971fd29f32c02e0b",
            "f5a82328e6e842eeb772c171ccdb57ab",
            "f7cfa914ad6841559b1eb9b203474000",
            "6c09c2b4125d4eadad1a446095209d7f",
            "b5d8b7c444214f2ab616b12eec614716",
            "ea8e7d69c63f491ca615cddb77c0dde2"
          ]
        },
        "outputId": "130ce4ea-2cda-44d1-8514-bf5cab1be096"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "README.md:   0%|          | 0.00/2.87k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "ee9c321418ce4322a0d6b28a3f2ca6a1"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "train-00000-of-00001.parquet:   0%|          | 0.00/13.6M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "5ccea58adc994a8082d77a0fd3dd5175"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating train split:   0%|          | 0/99545 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "029202e5e41d404385ac4a3a36989700"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['index', 'tokens', 'ner_tags'],\n",
            "        num_rows: 99545\n",
            "    })\n",
            "})\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Map:   0%|          | 0/99545 [00:00<?, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "880e3efa6bcc49cd98207c04c476e918"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Skipping malformed example: 7171f30e-fa1e-49ec-975e-16c88c9b95e9 due to error: malformed node or string: None\n",
            "Skipping malformed example: 91dfd97b-2997-4080-8054-00cadec14dfc due to error: malformed node or string: None\n",
            "Skipping malformed example: cfb8beb4-ae7a-4185-9a54-08b0e85d03d3 due to error: malformed node or string: None\n",
            "Skipping malformed example: 5f0a2991-38b3-435b-9059-a05382e89a62 due to error: malformed node or string: None\n",
            "Skipping malformed example: 9d705fde-ce09-4bef-9f4a-9ad1fa452cc9 due to error: malformed node or string: None\n",
            "Skipping malformed example: 182457fb-c648-4fca-a207-af5a00072d4a due to error: malformed node or string: None\n",
            "Skipping malformed example: d9205ccd-c692-4cf1-8310-181de8f4cdc8 due to error: malformed node or string: None\n",
            "Skipping malformed example: dac55265-38cd-4c4b-9e56-a48a77e108d4 due to error: malformed node or string: None\n",
            "Skipping malformed example: f3d38b45-0035-45ab-b0aa-79ae7c63ba7a due to error: malformed node or string: None\n",
            "Skipping malformed example: 5ed32762-bf5b-4db4-9dbd-07cd5c0541dc due to error: malformed node or string: None\n",
            "Skipping malformed example: 426fc958-8c6b-41d8-acfe-2082a6be6ada due to error: malformed node or string: None\n",
            "Skipping malformed example: 4b5aa52d-cd5e-43ee-ac4f-7a8da00860e1 due to error: malformed node or string: None\n",
            "Skipping malformed example: 53b1ce49-1f71-4770-a344-bf1d804fefd4 due to error: malformed node or string: None\n",
            "Skipping malformed example: 03e9e957-da8f-45dc-84d0-e556bfd023b3 due to error: malformed node or string: None\n",
            "Skipping malformed example: b7e12634-f7be-42cb-8e76-837af2f2d877 due to error: malformed node or string: None\n",
            "Skipping malformed example: 0c77b0ac-b1cf-4730-ae3d-d7c59221f181 due to error: malformed node or string: None\n",
            "Skipping malformed example: b4623202-dfcb-4fa8-9d28-5af818111de2 due to error: malformed node or string: None\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Initialize the tokenizer for multilingual NER using xlm-roberta-large\n",
        "# tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-large\")\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"akdeniz27/bert-base-turkish-cased-ner\")\n",
        "\n",
        "# Function to tokenize input and align labels with tokenized words\n",
        "def tokenize_and_align_labels(example):\n",
        "    # Tokenize the sentence while preserving word boundaries for correct NER tag alignment\n",
        "    tokenized_inputs = tokenizer(\n",
        "        example[\"tokens\"],            # List of words (tokens) in the sentence\n",
        "        truncation=True,               # Truncate sentences longer than max_length\n",
        "        is_split_into_words=True,      # Specify that input is a list of words\n",
        "        padding=\"max_length\",          # Pad to maximum sequence length\n",
        "        max_length=128,                # Set the maximum sequence length to 128 tokens\n",
        "    )\n",
        "\n",
        "    labels = []                        # List to store aligned NER labels\n",
        "    word_ids = tokenized_inputs.word_ids()  # Get word IDs for each token\n",
        "    previous_word_idx = None           # Initialize previous word index for tracking\n",
        "\n",
        "    # Loop through word indices to align NER tags with subword tokens\n",
        "    for word_idx in word_ids:\n",
        "        if word_idx is None:\n",
        "            labels.append(-100)        # Set padding token labels to -100 (ignored in loss)\n",
        "        elif word_idx != previous_word_idx:\n",
        "            # Assign the label from example's NER tags if word index matches\n",
        "            labels.append(example[\"ner_tags\"][word_idx] if word_idx < len(example[\"ner_tags\"]) else -100)\n",
        "        else:\n",
        "            labels.append(-100)        # Label subword tokens with -100 to avoid redundant labels\n",
        "        previous_word_idx = word_idx   # Update previous word index\n",
        "\n",
        "    tokenized_inputs[\"labels\"] = labels  # Add labels to tokenized inputs\n",
        "    return tokenized_inputs\n",
        "\n",
        "# Apply tokenization and label alignment function to the dataset\n",
        "tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=False)\n"
      ],
      "metadata": {
        "id": "-24SJijT-zth",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 177,
          "referenced_widgets": [
            "696d4ebbf97c44e9a6cdff707b87e953",
            "b7abe11e0fb041d49137296e1d4bf43e",
            "83d1e890914f4206b53e7c8fd3eafdec",
            "97258ff7b0874640bcb47e1d1beb73ad",
            "ba56149214a7449ab4023150af34380e",
            "e7b683ab266740d2b97981ffd12e0f47",
            "34884b30a6e24ef2b7992e39fbd487df",
            "6d8d6f3c10e54a99926a77759aed7980",
            "2aaa0e920af74c91b1497d4e2f0d7c7f",
            "33b46ebefd7a4e19b20c9113a6306e33",
            "46ffdf824b8e496bb4c739375d6a4255",
            "bddefea747c1479f94c77a3dd33d9e24",
            "e1c7251d48a24f68a7990588c43bb431",
            "4abaea6640c0425986e4ef171c6657cc",
            "24c89f9d90be49ce8ebd052ab3377f6e",
            "b933dfc3cb8e442d97be2ed145189923",
            "3d17626e1daf485e913ba3d8e9904763",
            "072e21b6c92e4c7fb79e3deb3ea26002",
            "f09e1e08239740299aa68312c3c41d22",
            "b1387e3381de4968938d371692b8e464",
            "90ebf3029fd447ce9ca0dc82a120c7d5",
            "ce0ccd2032d24fb8b94e4b3c047569d4",
            "1b2bd87619f045d5b0a7e346181f2e1e",
            "5c9ae0652af44b92be325b553edba0d7",
            "629332724bc04a28b67848977545ebc9",
            "9761b542588f48b6bee3f472534af6c4",
            "5ef9e949110d45e984cb5e079c434743",
            "0a84cbe234c44ee0a5ee0841760ab58c",
            "6b378450c5444334847af9ea1710b5ec",
            "549ca07ce27a41f1ba0565a7035e1295",
            "2e6cba86589a49449d63482bde3aef7d",
            "4ad95f20bbad48a7b9386f83946cdd70",
            "9e1ffe13e96b424abea09ab9b52b7f8a",
            "6707a1f508b14ec38bbe5a79f8577806",
            "e9401712d1854627bb1ae2d7b3d9ecd2",
            "9b1ebf0e78ad4b52b0401245d611a8bc",
            "a3593c2e810943ffa55e63596d749733",
            "ac343eb8c4c14a4c85c2b4f14ecfce7a",
            "543afb09b2384517888421a011a88586",
            "ef6775804f044e5d8c654d42cf74bd08",
            "7b43d651119e436c9d40f290f9eedf68",
            "10dd21983d9c42b38374028b115e63e1",
            "4107503fdb674b1c8c60685fb5fbe1db",
            "cb2ee95977c94e138024ca77bd05e794",
            "d0e50ec0c96741d48e76063bb183b8fa",
            "8085b5ba4ec34e638ba014fbb37e21d4",
            "a4a3807c55e8411cbc6c44c15180787b",
            "9a9197e5815e472eadda980fce33b8cf",
            "9cdb83209b154af39455ac4009bb5593",
            "d76e0ec95e354f8591f4dfc72f4039f9",
            "7cf4e5265dbb4092bfd7b8b7aedbd25f",
            "bd3c0665a69c4b6aa723fdf30454d337",
            "9149f0d1909f4dac95d3696fb8d61529",
            "e2e9e694e8dc436aba563d05de7e786b",
            "9e65b1fdbdbd4e91b1c32a6fa52c7561"
          ]
        },
        "outputId": "3cea5198-82bc-4d69-e886-9a3bbe6f1c87"
      },
      "execution_count": 7,
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "696d4ebbf97c44e9a6cdff707b87e953",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "tokenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "bddefea747c1479f94c77a3dd33d9e24",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "vocab.txt:   0%|          | 0.00/251k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "1b2bd87619f045d5b0a7e346181f2e1e",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "tokenizer.json:   0%|          | 0.00/497k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "6707a1f508b14ec38bbe5a79f8577806",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "d0e50ec0c96741d48e76063bb183b8fa",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Map:   0%|          | 0/99545 [00:00<?, ? examples/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Create a 90-10 split of the dataset for training and validation\n",
        "tokenized_datasets = tokenized_datasets[\"train\"].train_test_split(test_size=0.1)\n",
        "print(tokenized_datasets)  # Output structure of split datasets"
      ],
      "metadata": {
        "id": "DA7mW2it-zoo",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "a6ad737b-1cb1-487e-ec8c-71fbd1195a35"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['index', 'tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],\n",
            "        num_rows: 89590\n",
            "    })\n",
            "    test: Dataset({\n",
            "        features: ['index', 'tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],\n",
            "        num_rows: 9955\n",
            "    })\n",
            "})\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define a list of entity labels for NER tagging with B- (beginning) and I- (inside) markers\n",
        "label_list = [\n",
        "    \"O\",                  # Outside of a named entity\n",
        "    \"B-PERSON\", \"I-PERSON\",         # Person name (e.g., \"John\" in \"John Doe\")\n",
        "    \"B-LOCATION\", \"I-LOCATION\",     # Geographical location (e.g., \"Paris\")\n",
        "    \"B-ORGANISATION\", \"I-ORGANISATION\", # Organization name (e.g., \"UNICEF\")\n",
        "    \"B-DATE\", \"I-DATE\",             # Date entity (e.g., \"2024-11-05\")\n",
        "    \"B-TIME\", \"I-TIME\",             # Time (e.g., \"12:00 PM\")\n",
        "    \"B-MONEY\", \"I-MONEY\",           # Monetary values (e.g., \"$20\")\n",
        "    \"B-PERCENTAGE\", \"I-PERCENTAGE\", # Percentage values (e.g., \"20%\")\n",
        "    \"B-FACILITY\", \"I-FACILITY\",     # Physical facilities (e.g., \"Airport\")\n",
        "    \"B-PRODUCT\", \"I-PRODUCT\",       # Product names (e.g., \"iPhone\")\n",
        "    \"B-EVENT\", \"I-EVENT\",           # Named events (e.g., \"Olympics\")\n",
        "    \"B-ART\", \"I-ART\",               # Works of art (e.g., \"Mona Lisa\")\n",
        "    \"B-LAW\", \"I-LAW\",               # Laws and legal documents (e.g., \"Article 50\")\n",
        "    \"B-LANGUAGE\", \"I-LANGUAGE\",     # Languages (e.g., \"Azerbaijani\")\n",
        "    \"B-GPE\", \"I-GPE\",               # Geopolitical entities (e.g., \"Europe\")\n",
        "    \"B-NORP\", \"I-NORP\",             # Nationalities, religious groups, political groups\n",
        "    \"B-ORDINAL\", \"I-ORDINAL\",       # Ordinal indicators (e.g., \"first\", \"second\")\n",
        "    \"B-CARDINAL\", \"I-CARDINAL\",     # Cardinal numbers (e.g., \"three\")\n",
        "    \"B-DISEASE\", \"I-DISEASE\",       # Diseases (e.g., \"COVID-19\")\n",
        "    \"B-CONTACT\", \"I-CONTACT\",       # Contact info (e.g., email or phone number)\n",
        "    \"B-ADAGE\", \"I-ADAGE\",           # Common sayings or adages\n",
        "    \"B-QUANTITY\", \"I-QUANTITY\",     # Quantities (e.g., \"5 km\")\n",
        "    \"B-MISCELLANEOUS\", \"I-MISCELLANEOUS\", # Miscellaneous entities not fitting other categories\n",
        "    \"B-POSITION\", \"I-POSITION\",     # Job titles or positions (e.g., \"CEO\")\n",
        "    \"B-PROJECT\", \"I-PROJECT\"        # Project names (e.g., \"Project Apollo\")\n",
        "]"
      ],
      "metadata": {
        "id": "-lVHfKEE-zmm"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Initialize a data collator to handle padding and formatting for token classification\n",
        "data_collator = DataCollatorForTokenClassification(tokenizer)\n",
        "\n",
        "# Load a pre-trained model for token classification, adapted for NER tasks\n",
        "# model = AutoModelForTokenClassification.from_pretrained(\n",
        "#     \"xlm-roberta-large\",               # Base model (multilingual XLM-RoBERTa) for NER\n",
        "#     num_labels=len(label_list)        # Set the number of output labels to match NER categories\n",
        "# )\n",
        "\n",
        "model = AutoModelForTokenClassification.from_pretrained(\n",
        "    \"akdeniz27/bert-base-turkish-cased-ner\",\n",
        "    num_labels=len(label_list),  # Ensure this matches the number of labels for your NER task\n",
        "    ignore_mismatched_sizes=True  # Allow loading despite mismatched classifier layer size\n",
        ")\n"
      ],
      "metadata": {
        "id": "jUfWCaen-zjr",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "780fc977-4885-4a21-d3ab-d392f75b316c"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Some weights of BertForTokenClassification were not initialized from the model checkpoint at akdeniz27/bert-base-turkish-cased-ner and are newly initialized because the shapes did not match:\n",
            "- classifier.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([49]) in the model instantiated\n",
            "- classifier.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([49, 768]) in the model instantiated\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define a function to compute evaluation metrics for the model's predictions\n",
        "def compute_metrics(p):\n",
        "    predictions, labels = p  # Unpack predictions and true labels from the input\n",
        "\n",
        "    # Convert logits to predicted label indices by taking the argmax along the last axis\n",
        "    predictions = np.argmax(predictions, axis=2)\n",
        "\n",
        "    # Filter out special padding labels (-100) and convert indices to label names\n",
        "    true_labels = [[label_list[l] for l in label if l != -100] for label in labels]\n",
        "    true_predictions = [\n",
        "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
        "        for prediction, label in zip(predictions, labels)\n",
        "    ]\n",
        "\n",
        "    # Print a detailed classification report for each label category\n",
        "    print(classification_report(true_labels, true_predictions))\n",
        "\n",
        "    # Calculate and return key evaluation metrics\n",
        "    return {\n",
        "        # Precision measures the accuracy of predicted positive instances\n",
        "        # Important in NER to ensure entity predictions are correct and reduce false positives.\n",
        "        \"precision\": precision_score(true_labels, true_predictions),\n",
        "\n",
        "        # Recall measures the model's ability to capture all relevant entities\n",
        "        # Essential in NER to ensure the model captures all entities, reducing false negatives.\n",
        "        \"recall\": recall_score(true_labels, true_predictions),\n",
        "\n",
        "        # F1-score is the harmonic mean of precision and recall, balancing both metrics\n",
        "        # Useful in NER for providing an overall performance measure, especially when precision and recall are both important.\n",
        "        \"f1\": f1_score(true_labels, true_predictions),\n",
        "    }"
      ],
      "metadata": {
        "id": "9b7EajE_-zhS"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Set up training arguments for model training, defining essential training configurations\n",
        "training_args = TrainingArguments(\n",
        "    output_dir=\"./results\",               # Directory to save model checkpoints and final outputs\n",
        "    evaluation_strategy=\"epoch\",          # Evaluate model on the validation set at the end of each epoch\n",
        "    save_strategy=\"epoch\",                # Save model checkpoints at the end of each epoch\n",
        "    learning_rate=2e-5,                   # Set a low learning rate to ensure stable training for fine-tuning\n",
        "    per_device_train_batch_size=128,       # Number of examples per batch during training, balancing speed and memory\n",
        "    per_device_eval_batch_size=128,        # Number of examples per batch during evaluation\n",
        "    num_train_epochs=10,                   # Number of full training passes over the dataset\n",
        "    weight_decay=0.005,                    # Regularization term to prevent overfitting by penalizing large weights\n",
        "    fp16=True,                            # Use 16-bit floating point for faster and memory-efficient training\n",
        "    logging_dir='./logs',                 # Directory to store training logs\n",
        "    save_total_limit=2,                   # Keep only the 2 latest model checkpoints to save storage space\n",
        "    load_best_model_at_end=True,          # Load the best model based on metrics at the end of training\n",
        "    metric_for_best_model=\"f1\",           # Use F1-score to determine the best model checkpoint\n",
        "    report_to=\"none\"                      # Disable reporting to external services (useful in local runs)\n",
        ")\n"
      ],
      "metadata": {
        "id": "PmJTMpp6-zew"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Initialize the Trainer class to manage the training loop with all necessary components\n",
        "trainer = Trainer(\n",
        "    model=model,                         # The pre-trained model to be fine-tuned\n",
        "    args=training_args,                  # Training configuration parameters defined in TrainingArguments\n",
        "    train_dataset=tokenized_datasets[\"train\"],  # Tokenized training dataset\n",
        "    eval_dataset=tokenized_datasets[\"test\"],    # Tokenized validation dataset\n",
        "    tokenizer=tokenizer,                 # Tokenizer used for processing input text\n",
        "    data_collator=data_collator,         # Data collator for padding and batching during training\n",
        "    compute_metrics=compute_metrics,     # Function to calculate evaluation metrics like precision, recall, F1\n",
        "    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] # Stop training early if validation metrics don't improve for 2 epochs\n",
        ")\n"
      ],
      "metadata": {
        "id": "WqoF7QJy-zb2"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Begin the training process and capture the training metrics\n",
        "training_metrics = trainer.train()\n",
        "\n",
        "# Evaluate the model on the validation set after training\n",
        "eval_results = trainer.evaluate()\n",
        "\n",
        "# Print evaluation results, including precision, recall, and F1-score\n",
        "print(eval_results)\n"
      ],
      "metadata": {
        "id": "QveYYwvA-zUR",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "fcbe7627-f653-44a9-d288-bcf4fad16bdc"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='6300' max='7000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [6300/7000 17:10 < 01:54, 6.11 it/s, Epoch 9/10]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Epoch</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>1</td>\n",
              "      <td>0.433100</td>\n",
              "      <td>0.306711</td>\n",
              "      <td>0.739000</td>\n",
              "      <td>0.693282</td>\n",
              "      <td>0.715412</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>2</td>\n",
              "      <td>0.292700</td>\n",
              "      <td>0.275796</td>\n",
              "      <td>0.781565</td>\n",
              "      <td>0.688937</td>\n",
              "      <td>0.732334</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>3</td>\n",
              "      <td>0.250600</td>\n",
              "      <td>0.275115</td>\n",
              "      <td>0.758261</td>\n",
              "      <td>0.709425</td>\n",
              "      <td>0.733031</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>4</td>\n",
              "      <td>0.233700</td>\n",
              "      <td>0.273087</td>\n",
              "      <td>0.756184</td>\n",
              "      <td>0.716277</td>\n",
              "      <td>0.735689</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>5</td>\n",
              "      <td>0.214800</td>\n",
              "      <td>0.278477</td>\n",
              "      <td>0.756051</td>\n",
              "      <td>0.710996</td>\n",
              "      <td>0.732832</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>6</td>\n",
              "      <td>0.199200</td>\n",
              "      <td>0.286102</td>\n",
              "      <td>0.755068</td>\n",
              "      <td>0.717012</td>\n",
              "      <td>0.735548</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>7</td>\n",
              "      <td>0.192800</td>\n",
              "      <td>0.297157</td>\n",
              "      <td>0.742326</td>\n",
              "      <td>0.725802</td>\n",
              "      <td>0.733971</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>8</td>\n",
              "      <td>0.178900</td>\n",
              "      <td>0.304510</td>\n",
              "      <td>0.743206</td>\n",
              "      <td>0.723930</td>\n",
              "      <td>0.733442</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>9</td>\n",
              "      <td>0.171700</td>\n",
              "      <td>0.313845</td>\n",
              "      <td>0.743145</td>\n",
              "      <td>0.725535</td>\n",
              "      <td>0.734234</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.64      0.11      0.19      1988\n",
            "        DATE       0.46      0.45      0.46       844\n",
            "       EVENT       0.81      0.26      0.40        84\n",
            "    FACILITY       0.68      0.68      0.68      1146\n",
            "         LAW       0.58      0.54      0.56      1103\n",
            "    LOCATION       0.73      0.78      0.75      8806\n",
            "       MONEY       0.55      0.47      0.51       532\n",
            "ORGANISATION       0.62      0.61      0.62       527\n",
            "  PERCENTAGE       0.78      0.80      0.79      3679\n",
            "      PERSON       0.82      0.83      0.82      6924\n",
            "     PRODUCT       0.80      0.78      0.79      2653\n",
            "        TIME       0.58      0.35      0.44      1634\n",
            "\n",
            "   micro avg       0.74      0.69      0.72     29920\n",
            "   macro avg       0.67      0.56      0.58     29920\n",
            "weighted avg       0.73      0.69      0.70     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.55      0.13      0.21      1988\n",
            "        DATE       0.57      0.40      0.47       844\n",
            "       EVENT       0.88      0.33      0.48        84\n",
            "    FACILITY       0.75      0.65      0.70      1146\n",
            "         LAW       0.60      0.56      0.58      1103\n",
            "    LOCATION       0.79      0.76      0.77      8806\n",
            "       MONEY       0.60      0.54      0.57       532\n",
            "ORGANISATION       0.67      0.64      0.65       527\n",
            "  PERCENTAGE       0.78      0.81      0.80      3679\n",
            "      PERSON       0.87      0.81      0.84      6924\n",
            "     PRODUCT       0.81      0.80      0.81      2653\n",
            "        TIME       0.63      0.36      0.46      1634\n",
            "\n",
            "   micro avg       0.78      0.69      0.73     29920\n",
            "   macro avg       0.71      0.57      0.61     29920\n",
            "weighted avg       0.76      0.69      0.71     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.48      0.14      0.22      1988\n",
            "        DATE       0.54      0.44      0.48       844\n",
            "       EVENT       0.88      0.35      0.50        84\n",
            "    FACILITY       0.72      0.68      0.70      1146\n",
            "         LAW       0.60      0.59      0.60      1103\n",
            "    LOCATION       0.75      0.79      0.77      8806\n",
            "       MONEY       0.60      0.54      0.57       532\n",
            "ORGANISATION       0.63      0.67      0.65       527\n",
            "  PERCENTAGE       0.77      0.83      0.80      3679\n",
            "      PERSON       0.88      0.81      0.84      6924\n",
            "     PRODUCT       0.82      0.81      0.81      2653\n",
            "        TIME       0.57      0.44      0.50      1634\n",
            "\n",
            "   micro avg       0.76      0.71      0.73     29920\n",
            "   macro avg       0.69      0.59      0.62     29920\n",
            "weighted avg       0.74      0.71      0.72     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.49      0.14      0.21      1988\n",
            "        DATE       0.49      0.48      0.49       844\n",
            "       EVENT       0.88      0.36      0.51        84\n",
            "    FACILITY       0.72      0.68      0.70      1146\n",
            "         LAW       0.57      0.64      0.60      1103\n",
            "    LOCATION       0.77      0.79      0.78      8806\n",
            "       MONEY       0.62      0.57      0.59       532\n",
            "ORGANISATION       0.64      0.65      0.64       527\n",
            "  PERCENTAGE       0.77      0.83      0.80      3679\n",
            "      PERSON       0.87      0.81      0.84      6924\n",
            "     PRODUCT       0.82      0.80      0.81      2653\n",
            "        TIME       0.55      0.50      0.52      1634\n",
            "\n",
            "   micro avg       0.76      0.72      0.74     29920\n",
            "   macro avg       0.68      0.60      0.62     29920\n",
            "weighted avg       0.74      0.72      0.72     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.35      0.18      0.23      1988\n",
            "        DATE       0.53      0.43      0.48       844\n",
            "       EVENT       0.82      0.39      0.53        84\n",
            "    FACILITY       0.71      0.67      0.69      1146\n",
            "         LAW       0.61      0.59      0.60      1103\n",
            "    LOCATION       0.78      0.78      0.78      8806\n",
            "       MONEY       0.60      0.57      0.58       532\n",
            "ORGANISATION       0.61      0.67      0.64       527\n",
            "  PERCENTAGE       0.78      0.81      0.80      3679\n",
            "      PERSON       0.86      0.83      0.84      6924\n",
            "     PRODUCT       0.84      0.77      0.81      2653\n",
            "        TIME       0.57      0.48      0.52      1634\n",
            "\n",
            "   micro avg       0.76      0.71      0.73     29920\n",
            "   macro avg       0.67      0.60      0.63     29920\n",
            "weighted avg       0.74      0.71      0.72     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.36      0.16      0.22      1988\n",
            "        DATE       0.50      0.48      0.49       844\n",
            "       EVENT       0.82      0.38      0.52        84\n",
            "    FACILITY       0.72      0.68      0.70      1146\n",
            "         LAW       0.63      0.59      0.61      1103\n",
            "    LOCATION       0.77      0.79      0.78      8806\n",
            "       MONEY       0.59      0.60      0.60       532\n",
            "ORGANISATION       0.67      0.71      0.69       527\n",
            "  PERCENTAGE       0.77      0.84      0.80      3679\n",
            "      PERSON       0.87      0.81      0.84      6924\n",
            "     PRODUCT       0.84      0.80      0.82      2653\n",
            "        TIME       0.57      0.50      0.53      1634\n",
            "\n",
            "   micro avg       0.76      0.72      0.74     29920\n",
            "   macro avg       0.68      0.61      0.63     29920\n",
            "weighted avg       0.74      0.72      0.73     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.32      0.18      0.23      1988\n",
            "        DATE       0.50      0.49      0.49       844\n",
            "       EVENT       0.85      0.39      0.54        84\n",
            "    FACILITY       0.73      0.68      0.70      1146\n",
            "         LAW       0.60      0.62      0.61      1103\n",
            "    LOCATION       0.77      0.79      0.78      8806\n",
            "       MONEY       0.58      0.61      0.59       532\n",
            "ORGANISATION       0.64      0.69      0.66       527\n",
            "  PERCENTAGE       0.78      0.83      0.80      3679\n",
            "      PERSON       0.85      0.83      0.84      6924\n",
            "     PRODUCT       0.81      0.80      0.81      2653\n",
            "        TIME       0.56      0.52      0.54      1634\n",
            "\n",
            "   micro avg       0.74      0.73      0.73     29920\n",
            "   macro avg       0.66      0.62      0.63     29920\n",
            "weighted avg       0.73      0.73      0.73     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.29      0.20      0.24      1988\n",
            "        DATE       0.51      0.46      0.49       844\n",
            "       EVENT       0.85      0.40      0.55        84\n",
            "    FACILITY       0.72      0.68      0.70      1146\n",
            "         LAW       0.61      0.62      0.61      1103\n",
            "    LOCATION       0.77      0.79      0.78      8806\n",
            "       MONEY       0.61      0.60      0.60       532\n",
            "ORGANISATION       0.66      0.70      0.68       527\n",
            "  PERCENTAGE       0.78      0.83      0.80      3679\n",
            "      PERSON       0.85      0.83      0.84      6924\n",
            "     PRODUCT       0.83      0.80      0.81      2653\n",
            "        TIME       0.57      0.51      0.54      1634\n",
            "\n",
            "   micro avg       0.74      0.72      0.73     29920\n",
            "   macro avg       0.67      0.62      0.64     29920\n",
            "weighted avg       0.73      0.72      0.73     29920\n",
            "\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.31      0.18      0.23      1988\n",
            "        DATE       0.49      0.47      0.48       844\n",
            "       EVENT       0.83      0.40      0.54        84\n",
            "    FACILITY       0.72      0.68      0.70      1146\n",
            "         LAW       0.61      0.63      0.62      1103\n",
            "    LOCATION       0.77      0.79      0.78      8806\n",
            "       MONEY       0.60      0.60      0.60       532\n",
            "ORGANISATION       0.66      0.70      0.68       527\n",
            "  PERCENTAGE       0.78      0.82      0.80      3679\n",
            "      PERSON       0.85      0.83      0.84      6924\n",
            "     PRODUCT       0.81      0.81      0.81      2653\n",
            "        TIME       0.55      0.53      0.54      1634\n",
            "\n",
            "   micro avg       0.74      0.73      0.73     29920\n",
            "   macro avg       0.67      0.62      0.64     29920\n",
            "weighted avg       0.73      0.73      0.73     29920\n",
            "\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='78' max='78' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [78/78 00:05]\n",
              "    </div>\n",
              "    "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "              precision    recall  f1-score   support\n",
            "\n",
            "         ART       0.49      0.14      0.21      1988\n",
            "        DATE       0.49      0.48      0.49       844\n",
            "       EVENT       0.88      0.36      0.51        84\n",
            "    FACILITY       0.72      0.68      0.70      1146\n",
            "         LAW       0.57      0.64      0.60      1103\n",
            "    LOCATION       0.77      0.79      0.78      8806\n",
            "       MONEY       0.62      0.57      0.59       532\n",
            "ORGANISATION       0.64      0.65      0.64       527\n",
            "  PERCENTAGE       0.77      0.83      0.80      3679\n",
            "      PERSON       0.87      0.81      0.84      6924\n",
            "     PRODUCT       0.82      0.80      0.81      2653\n",
            "        TIME       0.55      0.50      0.52      1634\n",
            "\n",
            "   micro avg       0.76      0.72      0.74     29920\n",
            "   macro avg       0.68      0.60      0.62     29920\n",
            "weighted avg       0.74      0.72      0.72     29920\n",
            "\n",
            "{'eval_loss': 0.27308720350265503, 'eval_precision': 0.7561836209025793, 'eval_recall': 0.7162767379679145, 'eval_f1': 0.7356893977103037, 'eval_runtime': 10.7521, 'eval_samples_per_second': 925.866, 'eval_steps_per_second': 7.254, 'epoch': 9.0}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Define the directory where the trained model and tokenizer will be saved\n",
        "save_directory = \"./Azeri-Turkish-BERT-NER\"\n",
        "\n",
        "# Save the trained model to the specified directory\n",
        "model.save_pretrained(save_directory)\n",
        "\n",
        "# Save the tokenizer to the same directory for compatibility with the model\n",
        "tokenizer.save_pretrained(save_directory)\n"
      ],
      "metadata": {
        "id": "7yEFe2_n-zPG",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "be0d5e0f-ff43-4be6-984e-fecbc8c0bdf4"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "('./Azeri-Turkish-BERT-NER/tokenizer_config.json',\n",
              " './Azeri-Turkish-BERT-NER/special_tokens_map.json',\n",
              " './Azeri-Turkish-BERT-NER/vocab.txt',\n",
              " './Azeri-Turkish-BERT-NER/added_tokens.json',\n",
              " './Azeri-Turkish-BERT-NER/tokenizer.json')"
            ]
          },
          "metadata": {},
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import pipeline\n",
        "\n",
        "# Load tokenizer and model\n",
        "tokenizer = AutoTokenizer.from_pretrained(save_directory)\n",
        "model = AutoModelForTokenClassification.from_pretrained(save_directory)\n",
        "\n",
        "# Initialize the NER pipeline\n",
        "device = 0 if torch.cuda.is_available() else -1\n",
        "nlp_ner = pipeline(\"ner\", model=model, tokenizer=tokenizer, aggregation_strategy=\"simple\", device=device)\n"
      ],
      "metadata": {
        "id": "zkECg3v9-zNQ"
      },
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "label_mapping = {f\"LABEL_{i}\": label for i, label in enumerate(label_list) if label != \"O\"}\n",
        "\n",
        "def evaluate_model(test_texts, true_labels):\n",
        "    predictions = []\n",
        "    for i, text in enumerate(test_texts):\n",
        "        pred_entities = nlp_ner(text)\n",
        "        pred_labels = [label_mapping.get(entity[\"entity_group\"], \"O\") for entity in pred_entities if entity[\"entity_group\"] in label_mapping]\n",
        "        if len(pred_labels) != len(true_labels[i]):\n",
        "            print(f\"Warning: Inconsistent number of entities in sample {i+1}. Adjusting predicted entities.\")\n",
        "            pred_labels = pred_labels[:len(true_labels[i])]\n",
        "        predictions.append(pred_labels)\n",
        "    if all(len(true) == len(pred) for true, pred in zip(true_labels, predictions)):\n",
        "        precision = precision_score(true_labels, predictions)\n",
        "        recall = recall_score(true_labels, predictions)\n",
        "        f1 = f1_score(true_labels, predictions)\n",
        "        print(\"Precision:\", precision)\n",
        "        print(\"Recall:\", recall)\n",
        "        print(\"F1-Score:\", f1)\n",
        "        print(classification_report(true_labels, predictions))\n",
        "    else:\n",
        "        print(\"Error: Could not align all samples correctly for evaluation.\")\n"
      ],
      "metadata": {
        "id": "SOFqXU-M_bxO"
      },
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "test_texts = [\"Shahla Khuduyeva və Pasha Sığorta şirkəti haqqında məlumat.\"]\n",
        "true_labels = [[\"B-PERSON\", \"B-ORGANISATION\"]]\n",
        "evaluate_model(test_texts, true_labels)\n"
      ],
      "metadata": {
        "id": "WRCB-_66_buE",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b80e4507-f1c0-4dc3-f252-83fca43c6a11"
      },
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Warning: Inconsistent number of entities in sample 1. Adjusting predicted entities.\n",
            "Precision: 0.5\n",
            "Recall: 0.5\n",
            "F1-Score: 0.5\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "ORGANISATION       0.00      0.00      0.00         1\n",
            "      PERSON       0.50      1.00      0.67         1\n",
            "\n",
            "   micro avg       0.50      0.50      0.50         2\n",
            "   macro avg       0.25      0.50      0.33         2\n",
            "weighted avg       0.25      0.50      0.33         2\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "x53zS3Vv_brU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "5Uoebirj_boo"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "RKounG2l_bl5"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}