{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "A100", "machine_shape": "hm" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "ee9c321418ce4322a0d6b28a3f2ca6a1": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e3e7f4f191ab43b581e38d2b79dc3e89", "IPY_MODEL_d4d0156eae3c41fe9f1f7ffd1cdd8c35", "IPY_MODEL_8cf941ff295e449589a2d9a454cfefba" ], "layout": "IPY_MODEL_a592ff42cd3346bca504eedba0f3955b" } }, "e3e7f4f191ab43b581e38d2b79dc3e89": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_417708e3b88b4764a97ad0d6ef78e38b", "placeholder": "", "style": "IPY_MODEL_ef7b5cca37be4c0285aa863783edd83d", "value": "README.md: 100%" } }, "d4d0156eae3c41fe9f1f7ffd1cdd8c35": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_66dc16b4997f46439bed98ad8d0e8732", "max": 2872, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_cbd8f4782051447085d2c4c84b8185fb", "value": 2872 } }, "8cf941ff295e449589a2d9a454cfefba": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f20f1c3e78e84e46b9f7022c791775e4", "placeholder": "", "style": "IPY_MODEL_d87fdc84c0eb4243804b5e6c86f7eb78", "value": " 2.87k/2.87k [00:00<00:00, 172kB/s]" } }, "a592ff42cd3346bca504eedba0f3955b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "417708e3b88b4764a97ad0d6ef78e38b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ef7b5cca37be4c0285aa863783edd83d": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "66dc16b4997f46439bed98ad8d0e8732": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cbd8f4782051447085d2c4c84b8185fb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "f20f1c3e78e84e46b9f7022c791775e4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d87fdc84c0eb4243804b5e6c86f7eb78": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5ccea58adc994a8082d77a0fd3dd5175": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_2bfa1a551787407397a7d14faffaa471", "IPY_MODEL_f3bec46279cd4f99a7e7693aca5a463a", "IPY_MODEL_f275a7c407fa4b699039ed7bd9a7cdec" ], "layout": "IPY_MODEL_7e25a8d5567f474cb450095ed1b409fa" } }, "2bfa1a551787407397a7d14faffaa471": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f4c4efcc5f10400caac3695865596340", "placeholder": "", "style": "IPY_MODEL_65c6ebc06606451bb347f7291f4af0cb", "value": "train-00000-of-00001.parquet: 100%" } }, "f3bec46279cd4f99a7e7693aca5a463a": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_751e56f58fef4cfa8dbc032790667c4f", "max": 13578023, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ea9b7f2ae3cc4d819d1eedd09b498f4d", "value": 13578023 } }, "f275a7c407fa4b699039ed7bd9a7cdec": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e631c93fc21348528300a7a1013d39f5", "placeholder": "", "style": "IPY_MODEL_e3caeb64cd5e4cb08265ebcb7040b340", "value": " 13.6M/13.6M [00:00<00:00, 35.5MB/s]" } }, "7e25a8d5567f474cb450095ed1b409fa": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f4c4efcc5f10400caac3695865596340": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "65c6ebc06606451bb347f7291f4af0cb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "751e56f58fef4cfa8dbc032790667c4f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ea9b7f2ae3cc4d819d1eedd09b498f4d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e631c93fc21348528300a7a1013d39f5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e3caeb64cd5e4cb08265ebcb7040b340": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "029202e5e41d404385ac4a3a36989700": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9069aafbe84f47a6860ba99ff9de8fcd", "IPY_MODEL_41e04d59393245c898bae1bd80c824fe", "IPY_MODEL_acc44cac406a4f51ad13b82ad141f9ab" ], "layout": "IPY_MODEL_254f3a26f42e4ce193c7ea8283eb77af" } }, "9069aafbe84f47a6860ba99ff9de8fcd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_50566b0a37d64188beea5ea55edf60bc", "placeholder": "", "style": "IPY_MODEL_bbbd5baf90614ccaae15010e1982bd96", "value": "Generating train split: 100%" } }, "41e04d59393245c898bae1bd80c824fe": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6ddb7eaee2b7405e988582e5f6edc4d0", "max": 99545, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_5a1097b3c10846e29c9b838e9b3d41e7", "value": 99545 } }, "acc44cac406a4f51ad13b82ad141f9ab": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8b7e0c70f2e349fbae1146ee73014ebc", "placeholder": "", "style": "IPY_MODEL_ed9ff4cd49b646288e6d3d74efcec647", "value": " 99545/99545 [00:00<00:00, 354750.16 examples/s]" } }, "254f3a26f42e4ce193c7ea8283eb77af": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "50566b0a37d64188beea5ea55edf60bc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bbbd5baf90614ccaae15010e1982bd96": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6ddb7eaee2b7405e988582e5f6edc4d0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5a1097b3c10846e29c9b838e9b3d41e7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8b7e0c70f2e349fbae1146ee73014ebc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ed9ff4cd49b646288e6d3d74efcec647": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "880e3efa6bcc49cd98207c04c476e918": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_76ebb3ae1d354a139207ba45b0161206", "IPY_MODEL_8415ed49d34846efbb7d84341fe931c5", "IPY_MODEL_749f0a339075455f97b18ae42163457f" ], "layout": "IPY_MODEL_4f65674d3ae44bd7aa89873233d9c421" } }, "76ebb3ae1d354a139207ba45b0161206": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c454e4d7594543ee971fd29f32c02e0b", "placeholder": "", "style": "IPY_MODEL_f5a82328e6e842eeb772c171ccdb57ab", "value": "Map: 100%" } }, "8415ed49d34846efbb7d84341fe931c5": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f7cfa914ad6841559b1eb9b203474000", "max": 99545, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_6c09c2b4125d4eadad1a446095209d7f", "value": 99545 } }, "749f0a339075455f97b18ae42163457f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b5d8b7c444214f2ab616b12eec614716", "placeholder": "", "style": "IPY_MODEL_ea8e7d69c63f491ca615cddb77c0dde2", "value": " 99545/99545 [00:16<00:00, 7233.81 examples/s]" } }, "4f65674d3ae44bd7aa89873233d9c421": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c454e4d7594543ee971fd29f32c02e0b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f5a82328e6e842eeb772c171ccdb57ab": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f7cfa914ad6841559b1eb9b203474000": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6c09c2b4125d4eadad1a446095209d7f": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b5d8b7c444214f2ab616b12eec614716": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ea8e7d69c63f491ca615cddb77c0dde2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "696d4ebbf97c44e9a6cdff707b87e953": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_b7abe11e0fb041d49137296e1d4bf43e", "IPY_MODEL_83d1e890914f4206b53e7c8fd3eafdec", "IPY_MODEL_97258ff7b0874640bcb47e1d1beb73ad" ], "layout": "IPY_MODEL_ba56149214a7449ab4023150af34380e" } }, "b7abe11e0fb041d49137296e1d4bf43e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e7b683ab266740d2b97981ffd12e0f47", "placeholder": "", "style": "IPY_MODEL_34884b30a6e24ef2b7992e39fbd487df", "value": "tokenizer_config.json: 100%" } }, "83d1e890914f4206b53e7c8fd3eafdec": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6d8d6f3c10e54a99926a77759aed7980", "max": 373, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_2aaa0e920af74c91b1497d4e2f0d7c7f", "value": 373 } }, "97258ff7b0874640bcb47e1d1beb73ad": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_33b46ebefd7a4e19b20c9113a6306e33", "placeholder": "", "style": "IPY_MODEL_46ffdf824b8e496bb4c739375d6a4255", "value": " 373/373 [00:00<00:00, 29.3kB/s]" } }, "ba56149214a7449ab4023150af34380e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e7b683ab266740d2b97981ffd12e0f47": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "34884b30a6e24ef2b7992e39fbd487df": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6d8d6f3c10e54a99926a77759aed7980": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2aaa0e920af74c91b1497d4e2f0d7c7f": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "33b46ebefd7a4e19b20c9113a6306e33": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "46ffdf824b8e496bb4c739375d6a4255": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "bddefea747c1479f94c77a3dd33d9e24": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e1c7251d48a24f68a7990588c43bb431", "IPY_MODEL_4abaea6640c0425986e4ef171c6657cc", "IPY_MODEL_24c89f9d90be49ce8ebd052ab3377f6e" ], "layout": "IPY_MODEL_b933dfc3cb8e442d97be2ed145189923" } }, "e1c7251d48a24f68a7990588c43bb431": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3d17626e1daf485e913ba3d8e9904763", "placeholder": "", "style": "IPY_MODEL_072e21b6c92e4c7fb79e3deb3ea26002", "value": "vocab.txt: 100%" } }, "4abaea6640c0425986e4ef171c6657cc": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f09e1e08239740299aa68312c3c41d22", "max": 251003, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b1387e3381de4968938d371692b8e464", "value": 251003 } }, "24c89f9d90be49ce8ebd052ab3377f6e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_90ebf3029fd447ce9ca0dc82a120c7d5", "placeholder": "", "style": "IPY_MODEL_ce0ccd2032d24fb8b94e4b3c047569d4", "value": " 251k/251k [00:00<00:00, 7.94MB/s]" } }, "b933dfc3cb8e442d97be2ed145189923": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3d17626e1daf485e913ba3d8e9904763": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "072e21b6c92e4c7fb79e3deb3ea26002": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f09e1e08239740299aa68312c3c41d22": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b1387e3381de4968938d371692b8e464": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "90ebf3029fd447ce9ca0dc82a120c7d5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ce0ccd2032d24fb8b94e4b3c047569d4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1b2bd87619f045d5b0a7e346181f2e1e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_5c9ae0652af44b92be325b553edba0d7", "IPY_MODEL_629332724bc04a28b67848977545ebc9", "IPY_MODEL_9761b542588f48b6bee3f472534af6c4" ], "layout": "IPY_MODEL_5ef9e949110d45e984cb5e079c434743" } }, "5c9ae0652af44b92be325b553edba0d7": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0a84cbe234c44ee0a5ee0841760ab58c", "placeholder": "", "style": "IPY_MODEL_6b378450c5444334847af9ea1710b5ec", "value": "tokenizer.json: 100%" } }, "629332724bc04a28b67848977545ebc9": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_549ca07ce27a41f1ba0565a7035e1295", "max": 497438, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_2e6cba86589a49449d63482bde3aef7d", "value": 497438 } }, "9761b542588f48b6bee3f472534af6c4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4ad95f20bbad48a7b9386f83946cdd70", "placeholder": "", "style": "IPY_MODEL_9e1ffe13e96b424abea09ab9b52b7f8a", "value": " 497k/497k [00:00<00:00, 16.5MB/s]" } }, "5ef9e949110d45e984cb5e079c434743": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0a84cbe234c44ee0a5ee0841760ab58c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6b378450c5444334847af9ea1710b5ec": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "549ca07ce27a41f1ba0565a7035e1295": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2e6cba86589a49449d63482bde3aef7d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4ad95f20bbad48a7b9386f83946cdd70": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e1ffe13e96b424abea09ab9b52b7f8a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6707a1f508b14ec38bbe5a79f8577806": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e9401712d1854627bb1ae2d7b3d9ecd2", "IPY_MODEL_9b1ebf0e78ad4b52b0401245d611a8bc", "IPY_MODEL_a3593c2e810943ffa55e63596d749733" ], "layout": "IPY_MODEL_ac343eb8c4c14a4c85c2b4f14ecfce7a" } }, "e9401712d1854627bb1ae2d7b3d9ecd2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_543afb09b2384517888421a011a88586", "placeholder": "", "style": "IPY_MODEL_ef6775804f044e5d8c654d42cf74bd08", "value": "special_tokens_map.json: 100%" } }, "9b1ebf0e78ad4b52b0401245d611a8bc": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7b43d651119e436c9d40f290f9eedf68", "max": 112, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_10dd21983d9c42b38374028b115e63e1", "value": 112 } }, "a3593c2e810943ffa55e63596d749733": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4107503fdb674b1c8c60685fb5fbe1db", "placeholder": "", "style": "IPY_MODEL_cb2ee95977c94e138024ca77bd05e794", "value": " 112/112 [00:00<00:00, 9.93kB/s]" } }, "ac343eb8c4c14a4c85c2b4f14ecfce7a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "543afb09b2384517888421a011a88586": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ef6775804f044e5d8c654d42cf74bd08": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7b43d651119e436c9d40f290f9eedf68": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "10dd21983d9c42b38374028b115e63e1": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4107503fdb674b1c8c60685fb5fbe1db": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cb2ee95977c94e138024ca77bd05e794": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d0e50ec0c96741d48e76063bb183b8fa": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_8085b5ba4ec34e638ba014fbb37e21d4", "IPY_MODEL_a4a3807c55e8411cbc6c44c15180787b", "IPY_MODEL_9a9197e5815e472eadda980fce33b8cf" ], "layout": "IPY_MODEL_9cdb83209b154af39455ac4009bb5593" } }, "8085b5ba4ec34e638ba014fbb37e21d4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d76e0ec95e354f8591f4dfc72f4039f9", "placeholder": "", "style": "IPY_MODEL_7cf4e5265dbb4092bfd7b8b7aedbd25f", "value": "Map: 100%" } }, "a4a3807c55e8411cbc6c44c15180787b": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bd3c0665a69c4b6aa723fdf30454d337", "max": 99545, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9149f0d1909f4dac95d3696fb8d61529", "value": 99545 } }, "9a9197e5815e472eadda980fce33b8cf": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e2e9e694e8dc436aba563d05de7e786b", "placeholder": "", "style": "IPY_MODEL_9e65b1fdbdbd4e91b1c32a6fa52c7561", "value": " 99545/99545 [00:53<00:00, 1716.94 examples/s]" } }, "9cdb83209b154af39455ac4009bb5593": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d76e0ec95e354f8591f4dfc72f4039f9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7cf4e5265dbb4092bfd7b8b7aedbd25f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "bd3c0665a69c4b6aa723fdf30454d337": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9149f0d1909f4dac95d3696fb8d61529": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e2e9e694e8dc436aba563d05de7e786b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e65b1fdbdbd4e91b1c32a6fa52c7561": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "source": [ "!pip install transformers datasets seqeval huggingface_hub\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5v8KnAaD-z9t", "outputId": "ec89bbe3-e698-4e6f-eb27-ea15e3d2a549" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n", "Collecting datasets\n", " Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n", "Collecting seqeval\n", " Downloading seqeval-1.2.2.tar.gz (43 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.24.7)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n", "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.6)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n", "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Collecting multiprocess<0.70.17 (from datasets)\n", " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n", "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)\n", " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n", "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.5.2)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.12.2)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n", "Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.17.0)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.8.30)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.13.1)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n", "Downloading datasets-3.1.0-py3-none-any.whl (480 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m11.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: seqeval\n", " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=c1030f22e743c2a4b8d3cb548cb8e8f138f24b58417a9f47bbc72df908c59d18\n", " Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa\n", "Successfully built seqeval\n", "Installing collected packages: xxhash, fsspec, dill, multiprocess, seqeval, datasets\n", " Attempting uninstall: fsspec\n", " Found existing installation: fsspec 2024.10.0\n", " Uninstalling fsspec-2024.10.0:\n", " Successfully uninstalled fsspec-2024.10.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed datasets-3.1.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 seqeval-1.2.2 xxhash-3.5.0\n" ] } ] }, { "cell_type": "code", "source": [ "# Standard library imports\n", "import os # Provides functions for interacting with the operating system\n", "import warnings # Used to handle or suppress warnings\n", "import numpy as np # Essential for numerical operations and array manipulation\n", "import torch # PyTorch library for tensor computations and model handling\n", "import ast # Used for safe evaluation of strings to Python objects (e.g., parsing tokens)\n", "\n", "# Hugging Face and Transformers imports\n", "from datasets import load_dataset # Loads datasets for model training and evaluation\n", "from transformers import (\n", " AutoTokenizer, # Initializes a tokenizer from a pre-trained model\n", " DataCollatorForTokenClassification, # Handles padding and formatting of token classification data\n", " TrainingArguments, # Defines training parameters like batch size and learning rate\n", " Trainer, # High-level API for managing training and evaluation\n", " AutoModelForTokenClassification, # Loads a pre-trained model for token classification tasks\n", " get_linear_schedule_with_warmup, # Learning rate scheduler for gradual warm-up and linear decay\n", " EarlyStoppingCallback # Callback to stop training if validation performance plateaus\n", ")\n", "\n", "# Hugging Face Hub\n", "from huggingface_hub import login # Allows logging in to Hugging Face Hub to upload models\n", "\n", "# seqeval metrics for NER evaluation\n", "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n", "# Provides precision, recall, F1-score, and classification report for evaluating NER model performance\n" ], "metadata": { "id": "amREIFSH-z7r" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "# Log in to Hugging Face Hub\n", "login(token=\"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\")\n" ], "metadata": { "id": "K7adlboI-z4p", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "71221522-e2cb-446f-91d9-d6f5b3a5ef08" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: fineGrained).\n", "Your token has been saved to /root/.cache/huggingface/token\n", "Login successful\n" ] } ] }, { "cell_type": "code", "source": [ "# Disable WandB (Weights & Biases) logging to avoid unwanted log outputs during training\n", "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", "\n", "# Suppress warning messages to keep output clean, especially during training and evaluation\n", "warnings.filterwarnings(\"ignore\")\n" ], "metadata": { "id": "Qccgsjfs-zzA" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "# Load the Azerbaijani NER dataset from Hugging Face\n", "dataset = load_dataset(\"LocalDoc/azerbaijani-ner-dataset\")\n", "print(dataset) # Display dataset structure (e.g., train/validation splits)\n", "\n", "# Preprocessing function to format tokens and NER tags correctly\n", "def preprocess_example(example):\n", " try:\n", " # Convert string of tokens to a list and parse NER tags to integers\n", " example[\"tokens\"] = ast.literal_eval(example[\"tokens\"])\n", " example[\"ner_tags\"] = list(map(int, ast.literal_eval(example[\"ner_tags\"])))\n", " except (ValueError, SyntaxError) as e:\n", " # Skip and log malformed examples, ensuring error resilience\n", " print(f\"Skipping malformed example: {example['index']} due to error: {e}\")\n", " example[\"tokens\"] = []\n", " example[\"ner_tags\"] = []\n", " return example\n", "\n", "# Apply preprocessing to each dataset entry, ensuring consistent formatting\n", "dataset = dataset.map(preprocess_example)\n" ], "metadata": { "id": "fQ6ttUM8-zwM", "colab": { "base_uri": "https://localhost:8080/", "height": 604, "referenced_widgets": [ "ee9c321418ce4322a0d6b28a3f2ca6a1", "e3e7f4f191ab43b581e38d2b79dc3e89", "d4d0156eae3c41fe9f1f7ffd1cdd8c35", "8cf941ff295e449589a2d9a454cfefba", "a592ff42cd3346bca504eedba0f3955b", "417708e3b88b4764a97ad0d6ef78e38b", "ef7b5cca37be4c0285aa863783edd83d", "66dc16b4997f46439bed98ad8d0e8732", "cbd8f4782051447085d2c4c84b8185fb", "f20f1c3e78e84e46b9f7022c791775e4", "d87fdc84c0eb4243804b5e6c86f7eb78", "5ccea58adc994a8082d77a0fd3dd5175", "2bfa1a551787407397a7d14faffaa471", "f3bec46279cd4f99a7e7693aca5a463a", "f275a7c407fa4b699039ed7bd9a7cdec", "7e25a8d5567f474cb450095ed1b409fa", "f4c4efcc5f10400caac3695865596340", "65c6ebc06606451bb347f7291f4af0cb", "751e56f58fef4cfa8dbc032790667c4f", "ea9b7f2ae3cc4d819d1eedd09b498f4d", "e631c93fc21348528300a7a1013d39f5", "e3caeb64cd5e4cb08265ebcb7040b340", "029202e5e41d404385ac4a3a36989700", "9069aafbe84f47a6860ba99ff9de8fcd", "41e04d59393245c898bae1bd80c824fe", "acc44cac406a4f51ad13b82ad141f9ab", "254f3a26f42e4ce193c7ea8283eb77af", "50566b0a37d64188beea5ea55edf60bc", "bbbd5baf90614ccaae15010e1982bd96", "6ddb7eaee2b7405e988582e5f6edc4d0", "5a1097b3c10846e29c9b838e9b3d41e7", "8b7e0c70f2e349fbae1146ee73014ebc", "ed9ff4cd49b646288e6d3d74efcec647", "880e3efa6bcc49cd98207c04c476e918", "76ebb3ae1d354a139207ba45b0161206", "8415ed49d34846efbb7d84341fe931c5", "749f0a339075455f97b18ae42163457f", "4f65674d3ae44bd7aa89873233d9c421", "c454e4d7594543ee971fd29f32c02e0b", "f5a82328e6e842eeb772c171ccdb57ab", "f7cfa914ad6841559b1eb9b203474000", "6c09c2b4125d4eadad1a446095209d7f", "b5d8b7c444214f2ab616b12eec614716", "ea8e7d69c63f491ca615cddb77c0dde2" ] }, "outputId": "130ce4ea-2cda-44d1-8514-bf5cab1be096" }, "execution_count": 6, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "README.md: 0%| | 0.00/2.87k [00:00<?, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "ee9c321418ce4322a0d6b28a3f2ca6a1" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "train-00000-of-00001.parquet: 0%| | 0.00/13.6M [00:00<?, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "5ccea58adc994a8082d77a0fd3dd5175" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "Generating train split: 0%| | 0/99545 [00:00<?, ? examples/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "029202e5e41d404385ac4a3a36989700" } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['index', 'tokens', 'ner_tags'],\n", " num_rows: 99545\n", " })\n", "})\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Map: 0%| | 0/99545 [00:00<?, ? examples/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "880e3efa6bcc49cd98207c04c476e918" } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Skipping malformed example: 7171f30e-fa1e-49ec-975e-16c88c9b95e9 due to error: malformed node or string: None\n", "Skipping malformed example: 91dfd97b-2997-4080-8054-00cadec14dfc due to error: malformed node or string: None\n", "Skipping malformed example: cfb8beb4-ae7a-4185-9a54-08b0e85d03d3 due to error: malformed node or string: None\n", "Skipping malformed example: 5f0a2991-38b3-435b-9059-a05382e89a62 due to error: malformed node or string: None\n", "Skipping malformed example: 9d705fde-ce09-4bef-9f4a-9ad1fa452cc9 due to error: malformed node or string: None\n", "Skipping malformed example: 182457fb-c648-4fca-a207-af5a00072d4a due to error: malformed node or string: None\n", "Skipping malformed example: d9205ccd-c692-4cf1-8310-181de8f4cdc8 due to error: malformed node or string: None\n", "Skipping malformed example: dac55265-38cd-4c4b-9e56-a48a77e108d4 due to error: malformed node or string: None\n", "Skipping malformed example: f3d38b45-0035-45ab-b0aa-79ae7c63ba7a due to error: malformed node or string: None\n", "Skipping malformed example: 5ed32762-bf5b-4db4-9dbd-07cd5c0541dc due to error: malformed node or string: None\n", "Skipping malformed example: 426fc958-8c6b-41d8-acfe-2082a6be6ada due to error: malformed node or string: None\n", "Skipping malformed example: 4b5aa52d-cd5e-43ee-ac4f-7a8da00860e1 due to error: malformed node or string: None\n", "Skipping malformed example: 53b1ce49-1f71-4770-a344-bf1d804fefd4 due to error: malformed node or string: None\n", "Skipping malformed example: 03e9e957-da8f-45dc-84d0-e556bfd023b3 due to error: malformed node or string: None\n", "Skipping malformed example: b7e12634-f7be-42cb-8e76-837af2f2d877 due to error: malformed node or string: None\n", "Skipping malformed example: 0c77b0ac-b1cf-4730-ae3d-d7c59221f181 due to error: malformed node or string: None\n", "Skipping malformed example: b4623202-dfcb-4fa8-9d28-5af818111de2 due to error: malformed node or string: None\n" ] } ] }, { "cell_type": "code", "source": [ "# Initialize the tokenizer for multilingual NER using xlm-roberta-large\n", "# tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-large\")\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(\"akdeniz27/bert-base-turkish-cased-ner\")\n", "\n", "# Function to tokenize input and align labels with tokenized words\n", "def tokenize_and_align_labels(example):\n", " # Tokenize the sentence while preserving word boundaries for correct NER tag alignment\n", " tokenized_inputs = tokenizer(\n", " example[\"tokens\"], # List of words (tokens) in the sentence\n", " truncation=True, # Truncate sentences longer than max_length\n", " is_split_into_words=True, # Specify that input is a list of words\n", " padding=\"max_length\", # Pad to maximum sequence length\n", " max_length=128, # Set the maximum sequence length to 128 tokens\n", " )\n", "\n", " labels = [] # List to store aligned NER labels\n", " word_ids = tokenized_inputs.word_ids() # Get word IDs for each token\n", " previous_word_idx = None # Initialize previous word index for tracking\n", "\n", " # Loop through word indices to align NER tags with subword tokens\n", " for word_idx in word_ids:\n", " if word_idx is None:\n", " labels.append(-100) # Set padding token labels to -100 (ignored in loss)\n", " elif word_idx != previous_word_idx:\n", " # Assign the label from example's NER tags if word index matches\n", " labels.append(example[\"ner_tags\"][word_idx] if word_idx < len(example[\"ner_tags\"]) else -100)\n", " else:\n", " labels.append(-100) # Label subword tokens with -100 to avoid redundant labels\n", " previous_word_idx = word_idx # Update previous word index\n", "\n", " tokenized_inputs[\"labels\"] = labels # Add labels to tokenized inputs\n", " return tokenized_inputs\n", "\n", "# Apply tokenization and label alignment function to the dataset\n", "tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=False)\n" ], "metadata": { "id": "-24SJijT-zth", "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "696d4ebbf97c44e9a6cdff707b87e953", "b7abe11e0fb041d49137296e1d4bf43e", "83d1e890914f4206b53e7c8fd3eafdec", "97258ff7b0874640bcb47e1d1beb73ad", "ba56149214a7449ab4023150af34380e", "e7b683ab266740d2b97981ffd12e0f47", "34884b30a6e24ef2b7992e39fbd487df", "6d8d6f3c10e54a99926a77759aed7980", "2aaa0e920af74c91b1497d4e2f0d7c7f", "33b46ebefd7a4e19b20c9113a6306e33", "46ffdf824b8e496bb4c739375d6a4255", "bddefea747c1479f94c77a3dd33d9e24", "e1c7251d48a24f68a7990588c43bb431", "4abaea6640c0425986e4ef171c6657cc", "24c89f9d90be49ce8ebd052ab3377f6e", "b933dfc3cb8e442d97be2ed145189923", "3d17626e1daf485e913ba3d8e9904763", "072e21b6c92e4c7fb79e3deb3ea26002", "f09e1e08239740299aa68312c3c41d22", "b1387e3381de4968938d371692b8e464", "90ebf3029fd447ce9ca0dc82a120c7d5", "ce0ccd2032d24fb8b94e4b3c047569d4", "1b2bd87619f045d5b0a7e346181f2e1e", "5c9ae0652af44b92be325b553edba0d7", "629332724bc04a28b67848977545ebc9", "9761b542588f48b6bee3f472534af6c4", "5ef9e949110d45e984cb5e079c434743", "0a84cbe234c44ee0a5ee0841760ab58c", "6b378450c5444334847af9ea1710b5ec", "549ca07ce27a41f1ba0565a7035e1295", "2e6cba86589a49449d63482bde3aef7d", "4ad95f20bbad48a7b9386f83946cdd70", "9e1ffe13e96b424abea09ab9b52b7f8a", "6707a1f508b14ec38bbe5a79f8577806", "e9401712d1854627bb1ae2d7b3d9ecd2", "9b1ebf0e78ad4b52b0401245d611a8bc", "a3593c2e810943ffa55e63596d749733", "ac343eb8c4c14a4c85c2b4f14ecfce7a", "543afb09b2384517888421a011a88586", "ef6775804f044e5d8c654d42cf74bd08", "7b43d651119e436c9d40f290f9eedf68", "10dd21983d9c42b38374028b115e63e1", "4107503fdb674b1c8c60685fb5fbe1db", "cb2ee95977c94e138024ca77bd05e794", "d0e50ec0c96741d48e76063bb183b8fa", "8085b5ba4ec34e638ba014fbb37e21d4", "a4a3807c55e8411cbc6c44c15180787b", "9a9197e5815e472eadda980fce33b8cf", "9cdb83209b154af39455ac4009bb5593", "d76e0ec95e354f8591f4dfc72f4039f9", "7cf4e5265dbb4092bfd7b8b7aedbd25f", "bd3c0665a69c4b6aa723fdf30454d337", "9149f0d1909f4dac95d3696fb8d61529", "e2e9e694e8dc436aba563d05de7e786b", "9e65b1fdbdbd4e91b1c32a6fa52c7561" ] }, "outputId": "3cea5198-82bc-4d69-e886-9a3bbe6f1c87" }, "execution_count": 7, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "696d4ebbf97c44e9a6cdff707b87e953", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/373 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bddefea747c1479f94c77a3dd33d9e24", "version_major": 2, "version_minor": 0 }, "text/plain": [ "vocab.txt: 0%| | 0.00/251k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1b2bd87619f045d5b0a7e346181f2e1e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/497k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6707a1f508b14ec38bbe5a79f8577806", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d0e50ec0c96741d48e76063bb183b8fa", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/99545 [00:00<?, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ] }, { "cell_type": "code", "source": [ "# Create a 90-10 split of the dataset for training and validation\n", "tokenized_datasets = tokenized_datasets[\"train\"].train_test_split(test_size=0.1)\n", "print(tokenized_datasets) # Output structure of split datasets" ], "metadata": { "id": "DA7mW2it-zoo", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a6ad737b-1cb1-487e-ec8c-71fbd1195a35" }, "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['index', 'tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],\n", " num_rows: 89590\n", " })\n", " test: Dataset({\n", " features: ['index', 'tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],\n", " num_rows: 9955\n", " })\n", "})\n" ] } ] }, { "cell_type": "code", "source": [ "# Define a list of entity labels for NER tagging with B- (beginning) and I- (inside) markers\n", "label_list = [\n", " \"O\", # Outside of a named entity\n", " \"B-PERSON\", \"I-PERSON\", # Person name (e.g., \"John\" in \"John Doe\")\n", " \"B-LOCATION\", \"I-LOCATION\", # Geographical location (e.g., \"Paris\")\n", " \"B-ORGANISATION\", \"I-ORGANISATION\", # Organization name (e.g., \"UNICEF\")\n", " \"B-DATE\", \"I-DATE\", # Date entity (e.g., \"2024-11-05\")\n", " \"B-TIME\", \"I-TIME\", # Time (e.g., \"12:00 PM\")\n", " \"B-MONEY\", \"I-MONEY\", # Monetary values (e.g., \"$20\")\n", " \"B-PERCENTAGE\", \"I-PERCENTAGE\", # Percentage values (e.g., \"20%\")\n", " \"B-FACILITY\", \"I-FACILITY\", # Physical facilities (e.g., \"Airport\")\n", " \"B-PRODUCT\", \"I-PRODUCT\", # Product names (e.g., \"iPhone\")\n", " \"B-EVENT\", \"I-EVENT\", # Named events (e.g., \"Olympics\")\n", " \"B-ART\", \"I-ART\", # Works of art (e.g., \"Mona Lisa\")\n", " \"B-LAW\", \"I-LAW\", # Laws and legal documents (e.g., \"Article 50\")\n", " \"B-LANGUAGE\", \"I-LANGUAGE\", # Languages (e.g., \"Azerbaijani\")\n", " \"B-GPE\", \"I-GPE\", # Geopolitical entities (e.g., \"Europe\")\n", " \"B-NORP\", \"I-NORP\", # Nationalities, religious groups, political groups\n", " \"B-ORDINAL\", \"I-ORDINAL\", # Ordinal indicators (e.g., \"first\", \"second\")\n", " \"B-CARDINAL\", \"I-CARDINAL\", # Cardinal numbers (e.g., \"three\")\n", " \"B-DISEASE\", \"I-DISEASE\", # Diseases (e.g., \"COVID-19\")\n", " \"B-CONTACT\", \"I-CONTACT\", # Contact info (e.g., email or phone number)\n", " \"B-ADAGE\", \"I-ADAGE\", # Common sayings or adages\n", " \"B-QUANTITY\", \"I-QUANTITY\", # Quantities (e.g., \"5 km\")\n", " \"B-MISCELLANEOUS\", \"I-MISCELLANEOUS\", # Miscellaneous entities not fitting other categories\n", " \"B-POSITION\", \"I-POSITION\", # Job titles or positions (e.g., \"CEO\")\n", " \"B-PROJECT\", \"I-PROJECT\" # Project names (e.g., \"Project Apollo\")\n", "]" ], "metadata": { "id": "-lVHfKEE-zmm" }, "execution_count": 9, "outputs": [] }, { "cell_type": "code", "source": [ "# Initialize a data collator to handle padding and formatting for token classification\n", "data_collator = DataCollatorForTokenClassification(tokenizer)\n", "\n", "# Load a pre-trained model for token classification, adapted for NER tasks\n", "# model = AutoModelForTokenClassification.from_pretrained(\n", "# \"xlm-roberta-large\", # Base model (multilingual XLM-RoBERTa) for NER\n", "# num_labels=len(label_list) # Set the number of output labels to match NER categories\n", "# )\n", "\n", "model = AutoModelForTokenClassification.from_pretrained(\n", " \"akdeniz27/bert-base-turkish-cased-ner\",\n", " num_labels=len(label_list), # Ensure this matches the number of labels for your NER task\n", " ignore_mismatched_sizes=True # Allow loading despite mismatched classifier layer size\n", ")\n" ], "metadata": { "id": "jUfWCaen-zjr", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "780fc977-4885-4a21-d3ab-d392f75b316c" }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of BertForTokenClassification were not initialized from the model checkpoint at akdeniz27/bert-base-turkish-cased-ner and are newly initialized because the shapes did not match:\n", "- classifier.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([49]) in the model instantiated\n", "- classifier.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([49, 768]) in the model instantiated\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ] }, { "cell_type": "code", "source": [ "# Define a function to compute evaluation metrics for the model's predictions\n", "def compute_metrics(p):\n", " predictions, labels = p # Unpack predictions and true labels from the input\n", "\n", " # Convert logits to predicted label indices by taking the argmax along the last axis\n", " predictions = np.argmax(predictions, axis=2)\n", "\n", " # Filter out special padding labels (-100) and convert indices to label names\n", " true_labels = [[label_list[l] for l in label if l != -100] for label in labels]\n", " true_predictions = [\n", " [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", " ]\n", "\n", " # Print a detailed classification report for each label category\n", " print(classification_report(true_labels, true_predictions))\n", "\n", " # Calculate and return key evaluation metrics\n", " return {\n", " # Precision measures the accuracy of predicted positive instances\n", " # Important in NER to ensure entity predictions are correct and reduce false positives.\n", " \"precision\": precision_score(true_labels, true_predictions),\n", "\n", " # Recall measures the model's ability to capture all relevant entities\n", " # Essential in NER to ensure the model captures all entities, reducing false negatives.\n", " \"recall\": recall_score(true_labels, true_predictions),\n", "\n", " # F1-score is the harmonic mean of precision and recall, balancing both metrics\n", " # Useful in NER for providing an overall performance measure, especially when precision and recall are both important.\n", " \"f1\": f1_score(true_labels, true_predictions),\n", " }" ], "metadata": { "id": "9b7EajE_-zhS" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "# Set up training arguments for model training, defining essential training configurations\n", "training_args = TrainingArguments(\n", " output_dir=\"./results\", # Directory to save model checkpoints and final outputs\n", " evaluation_strategy=\"epoch\", # Evaluate model on the validation set at the end of each epoch\n", " save_strategy=\"epoch\", # Save model checkpoints at the end of each epoch\n", " learning_rate=2e-5, # Set a low learning rate to ensure stable training for fine-tuning\n", " per_device_train_batch_size=128, # Number of examples per batch during training, balancing speed and memory\n", " per_device_eval_batch_size=128, # Number of examples per batch during evaluation\n", " num_train_epochs=10, # Number of full training passes over the dataset\n", " weight_decay=0.005, # Regularization term to prevent overfitting by penalizing large weights\n", " fp16=True, # Use 16-bit floating point for faster and memory-efficient training\n", " logging_dir='./logs', # Directory to store training logs\n", " save_total_limit=2, # Keep only the 2 latest model checkpoints to save storage space\n", " load_best_model_at_end=True, # Load the best model based on metrics at the end of training\n", " metric_for_best_model=\"f1\", # Use F1-score to determine the best model checkpoint\n", " report_to=\"none\" # Disable reporting to external services (useful in local runs)\n", ")\n" ], "metadata": { "id": "PmJTMpp6-zew" }, "execution_count": 15, "outputs": [] }, { "cell_type": "code", "source": [ "# Initialize the Trainer class to manage the training loop with all necessary components\n", "trainer = Trainer(\n", " model=model, # The pre-trained model to be fine-tuned\n", " args=training_args, # Training configuration parameters defined in TrainingArguments\n", " train_dataset=tokenized_datasets[\"train\"], # Tokenized training dataset\n", " eval_dataset=tokenized_datasets[\"test\"], # Tokenized validation dataset\n", " tokenizer=tokenizer, # Tokenizer used for processing input text\n", " data_collator=data_collator, # Data collator for padding and batching during training\n", " compute_metrics=compute_metrics, # Function to calculate evaluation metrics like precision, recall, F1\n", " callbacks=[EarlyStoppingCallback(early_stopping_patience=5)] # Stop training early if validation metrics don't improve for 2 epochs\n", ")\n" ], "metadata": { "id": "WqoF7QJy-zb2" }, "execution_count": 16, "outputs": [] }, { "cell_type": "code", "source": [ "# Begin the training process and capture the training metrics\n", "training_metrics = trainer.train()\n", "\n", "# Evaluate the model on the validation set after training\n", "eval_results = trainer.evaluate()\n", "\n", "# Print evaluation results, including precision, recall, and F1-score\n", "print(eval_results)\n" ], "metadata": { "id": "QveYYwvA-zUR", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "fcbe7627-f653-44a9-d288-bcf4fad16bdc" }, "execution_count": 17, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "<IPython.core.display.HTML object>" ], "text/html": [ "\n", " <div>\n", " \n", " <progress value='6300' max='7000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", " [6300/7000 17:10 < 01:54, 6.11 it/s, Epoch 9/10]\n", " </div>\n", " <table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: left;\">\n", " <th>Epoch</th>\n", " <th>Training Loss</th>\n", " <th>Validation Loss</th>\n", " <th>Precision</th>\n", " <th>Recall</th>\n", " <th>F1</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>0.433100</td>\n", " <td>0.306711</td>\n", " <td>0.739000</td>\n", " <td>0.693282</td>\n", " <td>0.715412</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>0.292700</td>\n", " <td>0.275796</td>\n", " <td>0.781565</td>\n", " <td>0.688937</td>\n", " <td>0.732334</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>0.250600</td>\n", " <td>0.275115</td>\n", " <td>0.758261</td>\n", " <td>0.709425</td>\n", " <td>0.733031</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>0.233700</td>\n", " <td>0.273087</td>\n", " <td>0.756184</td>\n", " <td>0.716277</td>\n", " <td>0.735689</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>0.214800</td>\n", " <td>0.278477</td>\n", " <td>0.756051</td>\n", " <td>0.710996</td>\n", " <td>0.732832</td>\n", " </tr>\n", " <tr>\n", " <td>6</td>\n", " <td>0.199200</td>\n", " <td>0.286102</td>\n", " <td>0.755068</td>\n", " <td>0.717012</td>\n", " <td>0.735548</td>\n", " </tr>\n", " <tr>\n", " <td>7</td>\n", " <td>0.192800</td>\n", " <td>0.297157</td>\n", " <td>0.742326</td>\n", " <td>0.725802</td>\n", " <td>0.733971</td>\n", " </tr>\n", " <tr>\n", " <td>8</td>\n", " <td>0.178900</td>\n", " <td>0.304510</td>\n", " <td>0.743206</td>\n", " <td>0.723930</td>\n", " <td>0.733442</td>\n", " </tr>\n", " <tr>\n", " <td>9</td>\n", " <td>0.171700</td>\n", " <td>0.313845</td>\n", " <td>0.743145</td>\n", " <td>0.725535</td>\n", " <td>0.734234</td>\n", " </tr>\n", " </tbody>\n", "</table><p>" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ " precision recall f1-score support\n", "\n", " ART 0.64 0.11 0.19 1988\n", " DATE 0.46 0.45 0.46 844\n", " EVENT 0.81 0.26 0.40 84\n", " FACILITY 0.68 0.68 0.68 1146\n", " LAW 0.58 0.54 0.56 1103\n", " LOCATION 0.73 0.78 0.75 8806\n", " MONEY 0.55 0.47 0.51 532\n", "ORGANISATION 0.62 0.61 0.62 527\n", " PERCENTAGE 0.78 0.80 0.79 3679\n", " PERSON 0.82 0.83 0.82 6924\n", " PRODUCT 0.80 0.78 0.79 2653\n", " TIME 0.58 0.35 0.44 1634\n", "\n", " micro avg 0.74 0.69 0.72 29920\n", " macro avg 0.67 0.56 0.58 29920\n", "weighted avg 0.73 0.69 0.70 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.55 0.13 0.21 1988\n", " DATE 0.57 0.40 0.47 844\n", " EVENT 0.88 0.33 0.48 84\n", " FACILITY 0.75 0.65 0.70 1146\n", " LAW 0.60 0.56 0.58 1103\n", " LOCATION 0.79 0.76 0.77 8806\n", " MONEY 0.60 0.54 0.57 532\n", "ORGANISATION 0.67 0.64 0.65 527\n", " PERCENTAGE 0.78 0.81 0.80 3679\n", " PERSON 0.87 0.81 0.84 6924\n", " PRODUCT 0.81 0.80 0.81 2653\n", " TIME 0.63 0.36 0.46 1634\n", "\n", " micro avg 0.78 0.69 0.73 29920\n", " macro avg 0.71 0.57 0.61 29920\n", "weighted avg 0.76 0.69 0.71 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.48 0.14 0.22 1988\n", " DATE 0.54 0.44 0.48 844\n", " EVENT 0.88 0.35 0.50 84\n", " FACILITY 0.72 0.68 0.70 1146\n", " LAW 0.60 0.59 0.60 1103\n", " LOCATION 0.75 0.79 0.77 8806\n", " MONEY 0.60 0.54 0.57 532\n", "ORGANISATION 0.63 0.67 0.65 527\n", " PERCENTAGE 0.77 0.83 0.80 3679\n", " PERSON 0.88 0.81 0.84 6924\n", " PRODUCT 0.82 0.81 0.81 2653\n", " TIME 0.57 0.44 0.50 1634\n", "\n", " micro avg 0.76 0.71 0.73 29920\n", " macro avg 0.69 0.59 0.62 29920\n", "weighted avg 0.74 0.71 0.72 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.49 0.14 0.21 1988\n", " DATE 0.49 0.48 0.49 844\n", " EVENT 0.88 0.36 0.51 84\n", " FACILITY 0.72 0.68 0.70 1146\n", " LAW 0.57 0.64 0.60 1103\n", " LOCATION 0.77 0.79 0.78 8806\n", " MONEY 0.62 0.57 0.59 532\n", "ORGANISATION 0.64 0.65 0.64 527\n", " PERCENTAGE 0.77 0.83 0.80 3679\n", " PERSON 0.87 0.81 0.84 6924\n", " PRODUCT 0.82 0.80 0.81 2653\n", " TIME 0.55 0.50 0.52 1634\n", "\n", " micro avg 0.76 0.72 0.74 29920\n", " macro avg 0.68 0.60 0.62 29920\n", "weighted avg 0.74 0.72 0.72 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.35 0.18 0.23 1988\n", " DATE 0.53 0.43 0.48 844\n", " EVENT 0.82 0.39 0.53 84\n", " FACILITY 0.71 0.67 0.69 1146\n", " LAW 0.61 0.59 0.60 1103\n", " LOCATION 0.78 0.78 0.78 8806\n", " MONEY 0.60 0.57 0.58 532\n", "ORGANISATION 0.61 0.67 0.64 527\n", " PERCENTAGE 0.78 0.81 0.80 3679\n", " PERSON 0.86 0.83 0.84 6924\n", " PRODUCT 0.84 0.77 0.81 2653\n", " TIME 0.57 0.48 0.52 1634\n", "\n", " micro avg 0.76 0.71 0.73 29920\n", " macro avg 0.67 0.60 0.63 29920\n", "weighted avg 0.74 0.71 0.72 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.36 0.16 0.22 1988\n", " DATE 0.50 0.48 0.49 844\n", " EVENT 0.82 0.38 0.52 84\n", " FACILITY 0.72 0.68 0.70 1146\n", " LAW 0.63 0.59 0.61 1103\n", " LOCATION 0.77 0.79 0.78 8806\n", " MONEY 0.59 0.60 0.60 532\n", "ORGANISATION 0.67 0.71 0.69 527\n", " PERCENTAGE 0.77 0.84 0.80 3679\n", " PERSON 0.87 0.81 0.84 6924\n", " PRODUCT 0.84 0.80 0.82 2653\n", " TIME 0.57 0.50 0.53 1634\n", "\n", " micro avg 0.76 0.72 0.74 29920\n", " macro avg 0.68 0.61 0.63 29920\n", "weighted avg 0.74 0.72 0.73 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.32 0.18 0.23 1988\n", " DATE 0.50 0.49 0.49 844\n", " EVENT 0.85 0.39 0.54 84\n", " FACILITY 0.73 0.68 0.70 1146\n", " LAW 0.60 0.62 0.61 1103\n", " LOCATION 0.77 0.79 0.78 8806\n", " MONEY 0.58 0.61 0.59 532\n", "ORGANISATION 0.64 0.69 0.66 527\n", " PERCENTAGE 0.78 0.83 0.80 3679\n", " PERSON 0.85 0.83 0.84 6924\n", " PRODUCT 0.81 0.80 0.81 2653\n", " TIME 0.56 0.52 0.54 1634\n", "\n", " micro avg 0.74 0.73 0.73 29920\n", " macro avg 0.66 0.62 0.63 29920\n", "weighted avg 0.73 0.73 0.73 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.29 0.20 0.24 1988\n", " DATE 0.51 0.46 0.49 844\n", " EVENT 0.85 0.40 0.55 84\n", " FACILITY 0.72 0.68 0.70 1146\n", " LAW 0.61 0.62 0.61 1103\n", " LOCATION 0.77 0.79 0.78 8806\n", " MONEY 0.61 0.60 0.60 532\n", "ORGANISATION 0.66 0.70 0.68 527\n", " PERCENTAGE 0.78 0.83 0.80 3679\n", " PERSON 0.85 0.83 0.84 6924\n", " PRODUCT 0.83 0.80 0.81 2653\n", " TIME 0.57 0.51 0.54 1634\n", "\n", " micro avg 0.74 0.72 0.73 29920\n", " macro avg 0.67 0.62 0.64 29920\n", "weighted avg 0.73 0.72 0.73 29920\n", "\n", " precision recall f1-score support\n", "\n", " ART 0.31 0.18 0.23 1988\n", " DATE 0.49 0.47 0.48 844\n", " EVENT 0.83 0.40 0.54 84\n", " FACILITY 0.72 0.68 0.70 1146\n", " LAW 0.61 0.63 0.62 1103\n", " LOCATION 0.77 0.79 0.78 8806\n", " MONEY 0.60 0.60 0.60 532\n", "ORGANISATION 0.66 0.70 0.68 527\n", " PERCENTAGE 0.78 0.82 0.80 3679\n", " PERSON 0.85 0.83 0.84 6924\n", " PRODUCT 0.81 0.81 0.81 2653\n", " TIME 0.55 0.53 0.54 1634\n", "\n", " micro avg 0.74 0.73 0.73 29920\n", " macro avg 0.67 0.62 0.64 29920\n", "weighted avg 0.73 0.73 0.73 29920\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "<IPython.core.display.HTML object>" ], "text/html": [ "\n", " <div>\n", " \n", " <progress value='78' max='78' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", " [78/78 00:05]\n", " </div>\n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ " precision recall f1-score support\n", "\n", " ART 0.49 0.14 0.21 1988\n", " DATE 0.49 0.48 0.49 844\n", " EVENT 0.88 0.36 0.51 84\n", " FACILITY 0.72 0.68 0.70 1146\n", " LAW 0.57 0.64 0.60 1103\n", " LOCATION 0.77 0.79 0.78 8806\n", " MONEY 0.62 0.57 0.59 532\n", "ORGANISATION 0.64 0.65 0.64 527\n", " PERCENTAGE 0.77 0.83 0.80 3679\n", " PERSON 0.87 0.81 0.84 6924\n", " PRODUCT 0.82 0.80 0.81 2653\n", " TIME 0.55 0.50 0.52 1634\n", "\n", " micro avg 0.76 0.72 0.74 29920\n", " macro avg 0.68 0.60 0.62 29920\n", "weighted avg 0.74 0.72 0.72 29920\n", "\n", "{'eval_loss': 0.27308720350265503, 'eval_precision': 0.7561836209025793, 'eval_recall': 0.7162767379679145, 'eval_f1': 0.7356893977103037, 'eval_runtime': 10.7521, 'eval_samples_per_second': 925.866, 'eval_steps_per_second': 7.254, 'epoch': 9.0}\n" ] } ] }, { "cell_type": "code", "source": [ "# Define the directory where the trained model and tokenizer will be saved\n", "save_directory = \"./Azeri-Turkish-BERT-NER\"\n", "\n", "# Save the trained model to the specified directory\n", "model.save_pretrained(save_directory)\n", "\n", "# Save the tokenizer to the same directory for compatibility with the model\n", "tokenizer.save_pretrained(save_directory)\n" ], "metadata": { "id": "7yEFe2_n-zPG", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "be0d5e0f-ff43-4be6-984e-fecbc8c0bdf4" }, "execution_count": 18, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "('./Azeri-Turkish-BERT-NER/tokenizer_config.json',\n", " './Azeri-Turkish-BERT-NER/special_tokens_map.json',\n", " './Azeri-Turkish-BERT-NER/vocab.txt',\n", " './Azeri-Turkish-BERT-NER/added_tokens.json',\n", " './Azeri-Turkish-BERT-NER/tokenizer.json')" ] }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "code", "source": [ "from transformers import pipeline\n", "\n", "# Load tokenizer and model\n", "tokenizer = AutoTokenizer.from_pretrained(save_directory)\n", "model = AutoModelForTokenClassification.from_pretrained(save_directory)\n", "\n", "# Initialize the NER pipeline\n", "device = 0 if torch.cuda.is_available() else -1\n", "nlp_ner = pipeline(\"ner\", model=model, tokenizer=tokenizer, aggregation_strategy=\"simple\", device=device)\n" ], "metadata": { "id": "zkECg3v9-zNQ" }, "execution_count": 19, "outputs": [] }, { "cell_type": "code", "source": [ "label_mapping = {f\"LABEL_{i}\": label for i, label in enumerate(label_list) if label != \"O\"}\n", "\n", "def evaluate_model(test_texts, true_labels):\n", " predictions = []\n", " for i, text in enumerate(test_texts):\n", " pred_entities = nlp_ner(text)\n", " pred_labels = [label_mapping.get(entity[\"entity_group\"], \"O\") for entity in pred_entities if entity[\"entity_group\"] in label_mapping]\n", " if len(pred_labels) != len(true_labels[i]):\n", " print(f\"Warning: Inconsistent number of entities in sample {i+1}. Adjusting predicted entities.\")\n", " pred_labels = pred_labels[:len(true_labels[i])]\n", " predictions.append(pred_labels)\n", " if all(len(true) == len(pred) for true, pred in zip(true_labels, predictions)):\n", " precision = precision_score(true_labels, predictions)\n", " recall = recall_score(true_labels, predictions)\n", " f1 = f1_score(true_labels, predictions)\n", " print(\"Precision:\", precision)\n", " print(\"Recall:\", recall)\n", " print(\"F1-Score:\", f1)\n", " print(classification_report(true_labels, predictions))\n", " else:\n", " print(\"Error: Could not align all samples correctly for evaluation.\")\n" ], "metadata": { "id": "SOFqXU-M_bxO" }, "execution_count": 20, "outputs": [] }, { "cell_type": "code", "source": [ "test_texts = [\"Shahla Khuduyeva və Pasha Sığorta şirkəti haqqında məlumat.\"]\n", "true_labels = [[\"B-PERSON\", \"B-ORGANISATION\"]]\n", "evaluate_model(test_texts, true_labels)\n" ], "metadata": { "id": "WRCB-_66_buE", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b80e4507-f1c0-4dc3-f252-83fca43c6a11" }, "execution_count": 21, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Warning: Inconsistent number of entities in sample 1. Adjusting predicted entities.\n", "Precision: 0.5\n", "Recall: 0.5\n", "F1-Score: 0.5\n", " precision recall f1-score support\n", "\n", "ORGANISATION 0.00 0.00 0.00 1\n", " PERSON 0.50 1.00 0.67 1\n", "\n", " micro avg 0.50 0.50 0.50 2\n", " macro avg 0.25 0.50 0.33 2\n", "weighted avg 0.25 0.50 0.33 2\n", "\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "x53zS3Vv_brU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "5Uoebirj_boo" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "RKounG2l_bl5" }, "execution_count": null, "outputs": [] } ] }