{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting happytransformer\n", " Using cached happytransformer-3.0.0-py3-none-any.whl.metadata (4.4 kB)\n", "Requirement already satisfied: matplotlib in c:\\python312\\lib\\site-packages (3.9.2)\n", "Collecting torch>=1.0 (from happytransformer)\n", " Using cached torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)\n", "Requirement already satisfied: tqdm>=4.43 in c:\\python312\\lib\\site-packages (from happytransformer) (4.67.1)\n", "Collecting transformers<5.0.0,>=4.30.1 (from happytransformer)\n", " Using cached transformers-4.48.1-py3-none-any.whl.metadata (44 kB)\n", "Collecting datasets<3.0.0,>=2.13.1 (from happytransformer)\n", " Using cached datasets-2.21.0-py3-none-any.whl.metadata (21 kB)\n", "Requirement already satisfied: sentencepiece in c:\\python312\\lib\\site-packages (from happytransformer) (0.2.0)\n", "Requirement already satisfied: protobuf in c:\\python312\\lib\\site-packages (from happytransformer) (5.29.3)\n", "Collecting accelerate<1.0.0,>=0.20.1 (from happytransformer)\n", " Using cached accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)\n", "Collecting tokenizers<1.0.0,>=0.13.3 (from happytransformer)\n", " Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)\n", "Collecting wandb (from happytransformer)\n", " Using cached wandb-0.19.4-py3-none-win_amd64.whl.metadata (10 kB)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\python312\\lib\\site-packages (from matplotlib) (1.3.1)\n", "Requirement already satisfied: cycler>=0.10 in c:\\python312\\lib\\site-packages (from matplotlib) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\python312\\lib\\site-packages (from matplotlib) (4.55.0)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\python312\\lib\\site-packages (from matplotlib) (1.4.7)\n", "Requirement already satisfied: numpy>=1.23 in c:\\python312\\lib\\site-packages (from matplotlib) (2.1.3)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\amanu\\appdata\\roaming\\python\\python312\\site-packages (from matplotlib) (24.2)\n", "Requirement already satisfied: pillow>=8 in c:\\python312\\lib\\site-packages (from matplotlib) (11.0.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\python312\\lib\\site-packages (from matplotlib) (3.2.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\amanu\\appdata\\roaming\\python\\python312\\site-packages (from matplotlib) (2.9.0.post0)\n", "Requirement already satisfied: psutil in c:\\users\\amanu\\appdata\\roaming\\python\\python312\\site-packages (from accelerate<1.0.0,>=0.20.1->happytransformer) (6.1.0)\n", "Requirement already satisfied: pyyaml in c:\\python312\\lib\\site-packages (from accelerate<1.0.0,>=0.20.1->happytransformer) (6.0.2)\n", "Collecting huggingface-hub>=0.21.0 (from accelerate<1.0.0,>=0.20.1->happytransformer)\n", " Using cached huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)\n", "Requirement already satisfied: safetensors>=0.4.3 in c:\\python312\\lib\\site-packages (from accelerate<1.0.0,>=0.20.1->happytransformer) (0.5.2)\n", "Requirement already satisfied: filelock in c:\\python312\\lib\\site-packages (from datasets<3.0.0,>=2.13.1->happytransformer) (3.17.0)\n", "Requirement already satisfied: pyarrow>=15.0.0 in c:\\python312\\lib\\site-packages (from datasets<3.0.0,>=2.13.1->happytransformer) (19.0.0)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in c:\\python312\\lib\\site-packages (from datasets<3.0.0,>=2.13.1->happytransformer) (0.3.8)\n", "Collecting pandas (from datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)\n", "Collecting requests>=2.32.2 (from datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n", "Requirement already satisfied: xxhash in c:\\python312\\lib\\site-packages (from datasets<3.0.0,>=2.13.1->happytransformer) (3.5.0)\n", "Collecting multiprocess (from datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached multiprocess-0.70.17-py312-none-any.whl.metadata (7.2 kB)\n", "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in c:\\python312\\lib\\site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets<3.0.0,>=2.13.1->happytransformer) (2024.6.1)\n", "Collecting aiohttp (from datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached aiohttp-3.11.11-cp312-cp312-win_amd64.whl.metadata (8.0 kB)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\amanu\\appdata\\roaming\\python\\python312\\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in c:\\python312\\lib\\site-packages (from torch>=1.0->happytransformer) (4.12.2)\n", "Requirement already satisfied: networkx in c:\\python312\\lib\\site-packages (from torch>=1.0->happytransformer) (3.4.2)\n", "Collecting jinja2 (from torch>=1.0->happytransformer)\n", " Using cached jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)\n", "Requirement already satisfied: setuptools in c:\\python312\\lib\\site-packages (from torch>=1.0->happytransformer) (75.8.0)\n", "Requirement already satisfied: sympy==1.13.1 in c:\\python312\\lib\\site-packages (from torch>=1.0->happytransformer) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\python312\\lib\\site-packages (from sympy==1.13.1->torch>=1.0->happytransformer) (1.3.0)\n", "Requirement already satisfied: colorama in c:\\users\\amanu\\appdata\\roaming\\python\\python312\\site-packages (from tqdm>=4.43->happytransformer) (0.4.6)\n", "Requirement already satisfied: regex!=2019.12.17 in c:\\python312\\lib\\site-packages (from transformers<5.0.0,>=4.30.1->happytransformer) (2024.11.6)\n", "Collecting click!=8.0.0,>=7.1 (from wandb->happytransformer)\n", " Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)\n", "Requirement already satisfied: docker-pycreds>=0.4.0 in c:\\python312\\lib\\site-packages (from wandb->happytransformer) (0.4.0)\n", "Collecting gitpython!=3.1.29,>=1.0.0 (from wandb->happytransformer)\n", " Using cached GitPython-3.1.44-py3-none-any.whl.metadata (13 kB)\n", "Requirement already satisfied: platformdirs in c:\\users\\amanu\\appdata\\roaming\\python\\python312\\site-packages (from wandb->happytransformer) (4.3.6)\n", "Requirement already satisfied: pydantic<3,>=2.6 in c:\\python312\\lib\\site-packages (from wandb->happytransformer) (2.8.2)\n", "Requirement already satisfied: sentry-sdk>=2.0.0 in c:\\python312\\lib\\site-packages (from wandb->happytransformer) (2.20.0)\n", "Requirement already satisfied: setproctitle in c:\\python312\\lib\\site-packages (from wandb->happytransformer) (1.3.4)\n", "Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)\n", "Collecting aiosignal>=1.1.2 (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)\n", "Collecting attrs>=17.3.0 (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached attrs-25.1.0-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: frozenlist>=1.1.1 in c:\\python312\\lib\\site-packages (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\python312\\lib\\site-packages (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer) (6.1.0)\n", "Requirement already satisfied: propcache>=0.2.0 in c:\\python312\\lib\\site-packages (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer) (0.2.1)\n", "Collecting yarl<2.0,>=1.17.0 (from aiohttp->datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached yarl-1.18.3-cp312-cp312-win_amd64.whl.metadata (71 kB)\n", "Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb->happytransformer)\n", " Using cached gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)\n", "Requirement already satisfied: annotated-types>=0.4.0 in c:\\python312\\lib\\site-packages (from pydantic<3,>=2.6->wandb->happytransformer) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.20.1 in c:\\python312\\lib\\site-packages (from pydantic<3,>=2.6->wandb->happytransformer) (2.20.1)\n", "Collecting charset-normalizer<4,>=2 (from requests>=2.32.2->datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl.metadata (36 kB)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\python312\\lib\\site-packages (from requests>=2.32.2->datasets<3.0.0,>=2.13.1->happytransformer) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\python312\\lib\\site-packages (from requests>=2.32.2->datasets<3.0.0,>=2.13.1->happytransformer) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in c:\\python312\\lib\\site-packages (from requests>=2.32.2->datasets<3.0.0,>=2.13.1->happytransformer) (2024.7.4)\n", "Requirement already satisfied: MarkupSafe>=2.0 in c:\\python312\\lib\\site-packages (from jinja2->torch>=1.0->happytransformer) (3.0.2)\n", "INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\n", "Collecting multiprocess (from datasets<3.0.0,>=2.13.1->happytransformer)\n", " Using cached multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\python312\\lib\\site-packages (from pandas->datasets<3.0.0,>=2.13.1->happytransformer) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in c:\\python312\\lib\\site-packages (from pandas->datasets<3.0.0,>=2.13.1->happytransformer) (2025.1)\n", "Requirement already satisfied: smmap<6,>=3.0.1 in c:\\python312\\lib\\site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb->happytransformer) (5.0.2)\n", "Using cached happytransformer-3.0.0-py3-none-any.whl (24 kB)\n", "Using cached accelerate-0.34.2-py3-none-any.whl (324 kB)\n", "Using cached datasets-2.21.0-py3-none-any.whl (527 kB)\n", "Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl (2.4 MB)\n", "Using cached torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)\n", "Using cached transformers-4.48.1-py3-none-any.whl (9.7 MB)\n", "Using cached wandb-0.19.4-py3-none-win_amd64.whl (19.7 MB)\n", "Using cached click-8.1.8-py3-none-any.whl (98 kB)\n", "Using cached aiohttp-3.11.11-cp312-cp312-win_amd64.whl (437 kB)\n", "Using cached GitPython-3.1.44-py3-none-any.whl (207 kB)\n", "Using cached huggingface_hub-0.27.1-py3-none-any.whl (450 kB)\n", "Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n", "Using cached jinja2-3.1.5-py3-none-any.whl (134 kB)\n", "Using cached multiprocess-0.70.16-py312-none-any.whl (146 kB)\n", "Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl (11.5 MB)\n", "Using cached aiohappyeyeballs-2.4.4-py3-none-any.whl (14 kB)\n", "Using cached aiosignal-1.3.2-py2.py3-none-any.whl (7.6 kB)\n", "Using cached attrs-25.1.0-py3-none-any.whl (63 kB)\n", "Using cached charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl (102 kB)\n", "Using cached gitdb-4.0.12-py3-none-any.whl (62 kB)\n", "Using cached yarl-1.18.3-cp312-cp312-win_amd64.whl (90 kB)\n", "Installing collected packages: yarl, multiprocess, jinja2, gitdb, click, charset-normalizer, attrs, aiosignal, aiohappyeyeballs, torch, requests, pandas, gitpython, aiohttp, wandb, huggingface-hub, tokenizers, datasets, accelerate, transformers, happytransformer\n", "Note: you may need to restart the kernel to use updated packages.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING: Ignoring invalid distribution ~ip (c:\\Python312\\Lib\\site-packages)\n", "WARNING: Ignoring invalid distribution ~ip (c:\\Python312\\Lib\\site-packages)\n", " WARNING: Failed to write executable - trying to use .deleteme logic\n", "ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'c:\\\\Python312\\\\Scripts\\\\normalizer.exe' -> 'c:\\\\Python312\\\\Scripts\\\\normalizer.exe.deleteme'\n", "\n" ] } ], "source": [ "%pip install happytransformer matplotlib" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\amanu\\projects\\nlp\\aman\\env\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import pickle\n", "import matplotlib.pyplot as plt\n", "from happytransformer import HappyTextToText, TTTrainArgs" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "01/25/2025 23:54:32 - INFO - happytransformer.happy_transformer - Using device: cpu\n" ] } ], "source": [ "happy_tt = HappyTextToText(\"T5\", \"t5-base\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "01/26/2025 00:28:57 - INFO - happytransformer.happy_transformer - Preprocessing dataset...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Evaluating the model BEFORE training...\n" ] }, { "data": { "text/html": [ "\n", "