from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification, TrainingArguments from shared import CustomTokens, GeneralArguments from functools import lru_cache from dataclasses import dataclass, field from typing import Optional, Union import torch import classify import base64 import re import requests import json import logging logging.basicConfig() logger = logging.getLogger(__name__) # Public innertube key (b64 encoded so that it is not incorrectly flagged) INNERTUBE_KEY = base64.b64decode( b'QUl6YVN5QU9fRkoyU2xxVThRNFNURUhMR0NpbHdfWTlfMTFxY1c4').decode() YT_CONTEXT = { 'client': { 'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36,gzip(gfe)', 'clientName': 'WEB', 'clientVersion': '2.20211221.00.00', } } _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;\s*(?:var\s+meta|