diff --git a/ContraCLIP/.gitignore b/ContraCLIP/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..3441df48691240878e20714d3c03da78f7c4ed10 --- /dev/null +++ b/ContraCLIP/.gitignore @@ -0,0 +1,21 @@ +.directory +*/.directory +*~ +.idea/ +contra-clip-venv/ +*.pyc +__pycache__/ +*/__pycache__/ +dev/ +notebooks/ +figs/inkscape/ +models/pretrained/ + +scripts/train/BACKUP/ +scripts/eval/BACKUP/ +scripts/compare/BACKUP/ + +!experiments/ +experiments/* +experiments/latent_codes/TMP/ +!experiments/latent_codes/ diff --git a/ContraCLIP/README.md b/ContraCLIP/README.md new file mode 100644 index 0000000000000000000000000000000000000000..21ea25a774539224324e791fe6e5bd5597b17d98 --- /dev/null +++ b/ContraCLIP/README.md @@ -0,0 +1,178 @@ +# ContraCLIP: Interpretable GAN generation driven by pairs of contrasting sentences + +Authors official PyTorch implementation of the **[ContraCLIP: Interpretable GAN generation driven by pairs of contrasting sentences](https://arxiv.org/pdf/2206.02104.pdf)**. If you use this code for your research, please [**cite**](#citation) our paper. + +> **ContraCLIP: Interpretable GAN generation driven by pairs of contrasting sentences**
+> Christos Tzelepis, James Oldfield, Georgios Tzimiropoulos, and Ioannis Patras
+> https://arxiv.org/abs/2206.02104
+> ![ContraCLIP Summary](figs/summary.png) +> +> **Abstract**: This work addresses the problem of discovering non-linear interpretable paths in the latent space of pre-trained GANs in a model-agnostic manner. In the proposed method, the discovery is driven by a set of pairs of natural language sentences with contrasting semantics, named semantic dipoles, that serve as the limits of the interpretation that we require by the trainable latent paths to encode. By using the pre-trained CLIP encoder, the sentences are projected into the vision-language space, where they serve as dipoles, and where RBF-based warping functions define a set of non-linear directional paths, one for each semantic dipole, allowing in this way traversals from one semantic pole to the other. By defining an objective that discovers paths in the latent space of GANs that generate changes along the desired paths in the vision-language embedding space, we provide an intuitive way of controlling the underlying generating factors and address some of the limitations of the state-of-the-art works, namely, that a) they are typically tailored to specific GAN architectures (i.e., StyleGAN), b) they disregard the relative position of the manipulated and the original image in the image embedding and the relative position of the image and the text embeddings, and c) they lead to abrupt image manipulations and quickly arrive at regions of low density and, thus, low image quality, providing limited control of the generative factors. + + +| Semantic Dipole (i.e., contrasting sentences given in natural language) | Example | +| ------------------------------------------------------------ | :----------------------------------------------------------: | +| *"a picture of an **angry shaved man**." → "a picture of a **man** with a **beard crying**."*
[StyleGAN2@FFHQ] | | +| *"a picture of a person with **open eyes**." → "a picture of a person with **closed eyes**."*
[StyleGAN2@FFHQ] | | +| *"a picture of a **young person**." → "a picture of an **old person**."*
[StyleGAN2@FFHQ] | | +| *"a picture of a **man** with **hair**." → "a picture of a **bald man**."*
[ProgGAN@CelebA-HQ] | | +| *"a picture of a person with **happy** face." → "a picture of a person with **surprised** face."*
[ProgGAN@CelebA-HQ] | | +| *"a picture of a **face without makeup**." → "a picture of a **face with makeup**."*
[ProgGAN@CelebA-HQ] | | +| *"a picture of an **ugly cat**." → "a picture of a **cute cat**."*
[StyleGAN2@AFHQ-Cats] | | +| *"a picture of a **dog** with **small eyes**." → "a picture of a **dog** with **big eyes**."*
[StyleGAN2@AFHQ-Dogs] | | + + + +## Overview + +![ContraCLIP Overview](./figs/overview.svg) +

+The CLIP text space, warped due to semantic dipoles of contrasting pairs of sentences in natural language, provides supervision to the optimisation of non-linear interpretable paths in the latent space of a pre-trained GAN. +

+ + +## Installation + +We recommend installing the required packages using python's native virtual environment as follows: + +```bash +$ python -m venv contra-clip-venv +$ source contra-clip-venv/bin/activate +(contra-clip-venv) $ pip install --upgrade pip +(contra-clip-venv) $ pip install -r requirements.txt +(contra-clip-venv) $ pip install git+https://github.com/openai/CLIP.git +(contra-clip-venv) $ pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu113 +``` + +For using the aforementioned virtual environment in a Jupyter Notebook, you need to manually add the kernel as follows: + +```bash +(contra-clip-venv) $ python -m ipykernel install --user --name=contra-clip-venv +``` + + + +## Prerequisite pre-trained models and pre-trained ContraCLIP models + +Download the prerequisite pre-trained models (GAN generators and various pre-trained detectors, such as ArcFace, FairFace, etc), as well as (optionally) pre-trained ContraCLIP models (by passing `-m` or `----contraclip-models`) as follows: + +```bash +(contra-clip-venv) $ python download.py -m +``` + +This will create a directory `models/pretrained` with the following sub-directories (~3.3 GiB): +``` +./models/pretrained/ +├── genforce +│ ├── pggan_car256.pth +│ ├── pggan_celebahq1024.pth +│ ├── pggan_church256.pth +│ ├── stylegan2_afhqcat512.pth +│ ├── stylegan2_afhqdog512.pth +│ ├── stylegan2_car512.pth +│ ├── stylegan2_church256.pth +│ └── stylegan2_ffhq1024.pth +├── arcface +│ └── model_ir_se50.pth +├── au_detector +│ └── disfa_adaptation_f0.pth +├── celeba_attributes +│ └── eval_predictor.pth.tar +├── fairface +│ ├── fairface_alldata_4race_20191111.pt +│ └── res34_fair_align_multi_7_20190809.pt +├── hopenet +│ ├── hopenet_alpha1.pkl +│ ├── hopenet_alpha2.pkl +│ └── hopenet_robust_alpha1.pkl +└── sfd + └── s3fd-619a316812.pth +``` + +as well as, a directory `experiments/complete/` (if not already created by the user upon an experiment's completion) for downloading the ContraCLIP pre-trained models with the following sub-directories (~160 MiB): + +``` +.experiments/complete/ +├── ContraCLIP_pggan_celebahq1024-Z-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-attributes +├── ContraCLIP_pggan_celebahq1024-Z-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-attributes +├── ContraCLIP_stylegan2_afhqcat512-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-cats +├── ContraCLIP_stylegan2_afhqdog512-W+-K4-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-dogs +├── ContraCLIP_stylegan2_car512-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-cars +├── ContraCLIP_stylegan2_ffhq1024-W+-K21-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-expressions +├── ContraCLIP_stylegan2_ffhq1024-W+-K21-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-expressions +├── ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-complex +├── ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-expressions3 +├── ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-complex +├── ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-expressions3 +├── ContraCLIP_stylegan2_ffhq1024-W+-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-attributes +└── ContraCLIP_stylegan2_ffhq1024-W+-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-attributes +``` + +We note that the pre-trained detectors (such as ArcFace) are used only during the evaluation stage (**no ID preserving loss is imposed during training**). + + + +## Training + +For training a ContraCLIP model you need to use `train.py` (check its basic usage by running `python train.py -h`). For example, in order to train a ContraCLIP model for the corpus of contrasting sentences called "expressions3" (defined in `lib/config.py`) on the StyleGAN2 pre-trained (on FFHQ) generator (in its `W` latent space with a truncation parameter equal to `0.7`), the following command: + +```bash +(contra-clip-venv) $ python train.py --gan=stylegan2_ffhq1024 --truncation=0.7 --stylegan-space=W --corpus=expressions3 --num-latent-support-dipoles=128 --loss=contrastive --temperature=0.5 --beta=0.75 --min-shift-magnitude=0.1 --max-shift-magnitude=0.2 --batch-size=3 --max-iter=120000 --log-freq=10--ckp-freq=100 +``` + +In the example above, the batch size is set to `3` and the training will be conducted for `120000` iterations. Minimum and maximum shift magnitudes are set to `0.1` and `0.2`, respectively, and the number of support dipoles for each latent path is set to `128` (please see the [WarpedGANSpace](https://github.com/chi0tzp/WarpedGANSpace) for more details). Moreover, `contrastive` loss is being used with a temperature parameter equal to `0.5`. The `beta` parameter of the CLIP text space RBF dipoles is set to `0.75`. A set of auxiliary training scripts (for the results reported in the paper) can be found under `scripts/train/`. + +The training script will create a directory with the following name format: + +``` +ContraCLIP_--K-D-eps_-_beta--contrastive_- +``` + +For instance, `ContraCLIP_stylegan2_ffhq1024-W-K3-D128-eps0.1_0.2-nonlinear_beta-0.75-contrastive_0.5-expressions3`, under `experiments/wip/` while training is in progress, which after training completion, will be copied under `experiments/complete/`. This directory has the following structure: + +``` +├── models/ +├── args.json +├── stats.json +└── command.sh +``` + +where `models/` contains the weights for the latent support sets (`latent_support_sets.pt`). While training is in progress (i.e., while this directory is found under `experiments/wip/`), the corresponding `models/` directory contains a checkpoint file (`checkpoint.pt`) containing the last iteration, and the weights for the latent support sets, so as to resume training. Re-run the same command, and if the last iteration is less than the given maximum number of iterations, training will resume from the last iteration. This directory will be referred to as `EXP_DIR` for the rest of this document. + + + +## Evaluation + +As soon as a *ContraCLIP* model is trained, the corresponding experiment's directory (i.e., `EXP_DIR`) can be found under `experiments/complete/`. In order to evaluate the model, we can generate image sequences across the discovered latent paths (for the given pairs of contrasting sentences). For doing so, we need to create a pool of latent codes/images for the corresponding GAN type. This can be done using `sample_gan.py`. The pool of latent codes/images will be stored under `experiments/latent_codes//`. We will be referring to it as `POOL` for the rest of this document. + +For example, the following command will create a pool named `stylegan2_ffhq1024-4` under `experiments/latent_codes/stylegan2_ffhq1024/`: + +```bash +(contra-clip-venv) $ python sample_gan.py -v --gan-type=stylegan2_ffhq1024 --stylegan-space=W --truncation=0.7 --num-samples=4 +``` + +Latent space traversals can then be calculated using the script `traverse_latent_space.py` (please check its basic usage by running `traverse_latent_space.py -h`) for a given model and a given `POOL`. Upon completion, results (i.e., latent traversals) will be stored under the following directory: + +`experiments/complete/EXP_DIR/results/POOL/<2*shift_steps>__`, + +where `eps`, `shift_steps`, and `total_length` denote respectively the shift magnitude (of a single step on the path), the number of such steps, and the total traversal length. A set of auxiliary evaluation scripts (for the results reported in the paper) can be found under `scripts/eval/`. + + + +## Citation + +```bibtex +@misc{tzelepis2022contraclip, + author = {Tzelepis, Christos and James, Oldfield and Tzimiropoulos, Georgios and Patras, Ioannis}, + title = {{ContraCLIP}: Interpretable {GAN} generation driven by pairs of contrasting sentences}, + year={2022}, + eprint={2206.02104}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + + + + + diff --git a/ContraCLIP/calculate_jung_radii.py b/ContraCLIP/calculate_jung_radii.py new file mode 100644 index 0000000000000000000000000000000000000000..9aef4210ef941cba77ffd1e278c7573b61772e52 --- /dev/null +++ b/ContraCLIP/calculate_jung_radii.py @@ -0,0 +1,210 @@ +import argparse +import numpy as np +import os.path as osp +import torch +from lib import GENFORCE_MODELS +from models.load_generator import load_generator +from sklearn import linear_model +from collections import defaultdict +from tqdm import tqdm +import json + + +def make_dict(): + return defaultdict(make_dict) + + +def main(): + """A script for calculating the radii of minimal enclosing balls for the latent space of a (i.e., in Z/W/W+ space), + given a truncation parameter. When applicable, a linear model is trained in order to predict the radii of the latent + codes, given a truncation parameter. + + The parameters of the linear model (i.e., the weight w and the bias b) are stored for each GAN type and each latent + space in a json file (i.e., models/jung_radii.json) as a dictionary with the following format: + { + ... + : + { + 'Z': (, ), + 'W': + { + ... + : (, ), + ... + }, + }, + ... + } + so as, given a truncation parameter t, the radius is given as `w * t + b`. + + Options: + -v, --verbose : set verbose mode on + --num-samples : set the number of latent codes to sample for generating images + --cuda : use CUDA (default) + --no-cuda : do not use CUDA + """ + parser = argparse.ArgumentParser(description="Fit a linear model for the jung radius of GAN's latent code given " + "a truncation parameter") + parser.add_argument('-v', '--verbose', action='store_true', help="verbose mode on") + parser.add_argument('--num-samples', type=int, default=1000, help="set number of latent codes to sample") + parser.add_argument('--cuda', dest='cuda', action='store_true', help="use CUDA during training") + parser.add_argument('--no-cuda', dest='cuda', action='store_false', help="do NOT use CUDA during training") + parser.set_defaults(cuda=True) + # ================================================================================================================ # + + # Parse given arguments + args = parser.parse_args() + + # CUDA + use_cuda = False + if torch.cuda.is_available(): + if args.cuda: + use_cuda = True + torch.set_default_tensor_type('torch.cuda.FloatTensor') + else: + print("*** WARNING ***: It looks like you have a CUDA device, but aren't using CUDA.\n" + " Run with --cuda for optimal training speed.") + torch.set_default_tensor_type('torch.FloatTensor') + else: + torch.set_default_tensor_type('torch.FloatTensor') + + # Build jung radii dictionary and populate it + nested_dict = lambda: defaultdict(nested_dict) + jung_radii_dict = nested_dict() + for gan in GENFORCE_MODELS.keys(): + ################################################################################################################ + ## ## + ## [ StyleGANs ] ## + ## ## + ################################################################################################################ + if 'stylegan' in gan: + ############################################################################################################ + ## ## + ## [ StyleGAN / Z-space ] ## + ## ## + ############################################################################################################ + # Build GAN generator model and load with pre-trained weights + if args.verbose: + print(" \\__Build GAN generator model G and load with pre-trained weights...") + print(" \\__GAN generator : {} (res: {})".format(gan, GENFORCE_MODELS[gan][1])) + print(" \\__Pre-trained weights: {}".format(GENFORCE_MODELS[gan][0])) + + G = load_generator(model_name=gan, latent_is_w=False, verbose=args.verbose).eval() + + # Upload GAN generator model to GPU + if use_cuda: + G = G.cuda() + + # Latent codes sampling + if args.verbose: + print(" \\__Sample {} {}-dimensional latent codes...".format(args.num_samples, G.dim_z)) + zs = torch.randn(args.num_samples, G.dim_z) + + if use_cuda: + zs = zs.cuda() + + # Calculate expected latent norm + if args.verbose: + print(" \\__Calculate Jung radius...") + jung_radius = torch.cdist(zs, zs).max() * np.sqrt(G.dim_z / (2 * (G.dim_z + 1))) + jung_radii_dict[gan]['Z'] = (0.0, jung_radius.cpu().detach().item()) + + ############################################################################################################ + ## ## + ## [ StyleGAN / W/W+-space ] ## + ## ## + ############################################################################################################ + # Build GAN generator model and load with pre-trained weights + if args.verbose: + print(" \\__Build GAN generator model G and load with pre-trained weights...") + print(" \\__GAN generator : {} (res: {})".format(gan, GENFORCE_MODELS[gan][1])) + print(" \\__Pre-trained weights: {}".format(GENFORCE_MODELS[gan][0])) + + G = load_generator(model_name=gan, latent_is_w=True, verbose=args.verbose).eval() + + # Upload GAN generator model to GPU + if use_cuda: + G = G.cuda() + + # Latent codes sampling + if args.verbose: + print(" \\__Sample {} {}-dimensional latent codes...".format(args.num_samples, G.dim_z)) + zs = torch.randn(args.num_samples, G.dim_z) + + if use_cuda: + zs = zs.cuda() + + # Get number of W layers for the given StyleGAN + stylegan_num_layers = G.get_w(zs, truncation=1.0).shape[1] + + # Calculate expected latent norm and fit a linear model for each version of the W+ space + if args.verbose: + print(" \\__Calculate Jung radii and fit linear models...") + data_per_layer = dict() + tmp = [] + for truncation in tqdm(np.linspace(0.1, 1.0, 100), desc=" \\__Calculate radii (W space): "): + ws = G.get_w(zs, truncation=truncation)[:, 0, :] + jung_radius = torch.cdist(ws, ws).max() * np.sqrt(ws.shape[1] / (2 * (ws.shape[1] + 1))) + tmp.append([truncation, jung_radius.cpu().detach().item()]) + data_per_layer.update({0: tmp}) + + for ll in tqdm(range(1, stylegan_num_layers), desc=" \\__Calculate radii (W+ space): "): + tmp = [] + for truncation in np.linspace(0.1, 1.0, 100): + ws_plus = G.get_w(zs, truncation=truncation)[:, :ll + 1, :] + ws_plus = ws_plus.reshape(ws_plus.shape[0], -1) + jung_radius = torch.cdist(ws_plus, ws_plus).max() * \ + np.sqrt(ws_plus.shape[1] / (2 * (ws_plus.shape[1] + 1))) + tmp.append([truncation, jung_radius.cpu().detach().item()]) + data_per_layer.update({ll: tmp}) + + for ll, v in tqdm(data_per_layer.items(), desc=" \\__Fit linear models"): + v = np.array(v) + lm = linear_model.LinearRegression() + lm.fit(v[:, 0].reshape(-1, 1), v[:, 1].reshape(-1, 1)) + jung_radii_dict[gan]['W'][ll] = (float(lm.coef_[0, 0]), float(lm.intercept_[0])) + + ################################################################################################################ + ## ## + ## [ ProgGAN ] ## + ## ## + ################################################################################################################ + else: + # Build GAN generator model and load with pre-trained weights + if args.verbose: + print(" \\__Build GAN generator model G and load with pre-trained weights...") + print(" \\__GAN generator : {} (res: {})".format(gan, GENFORCE_MODELS[gan][1])) + print(" \\__Pre-trained weights: {}".format(GENFORCE_MODELS[gan][0])) + + G = load_generator(model_name=gan, latent_is_w=False, verbose=args.verbose).eval() + + # Upload GAN generator model to GPU + if use_cuda: + G = G.cuda() + + # Latent codes sampling + if args.verbose: + print(" \\__Sample {} {}-dimensional latent codes...".format(args.num_samples, G.dim_z)) + zs = torch.randn(args.num_samples, G.dim_z) + + if use_cuda: + zs = zs.cuda() + + # Calculate expected latent norm + if args.verbose: + print(" \\__Calculate Jung radius...") + jung_radius = torch.cdist(zs, zs).max() * np.sqrt(G.dim_z / (2 * (G.dim_z + 1))) + + print("jung_radius") + print(jung_radius) + print(type(jung_radius)) + + jung_radii_dict[gan]['Z'] = (0.0, jung_radius.cpu().detach().item()) + + # Save expected latent norms dictionary + with open(osp.join('models', 'jung_radii.json'), 'w') as fp: + json.dump(jung_radii_dict, fp) + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/checkpoint2model.py b/ContraCLIP/checkpoint2model.py new file mode 100644 index 0000000000000000000000000000000000000000..6370bca3b9414bebeffe7676283246198dd9b955 --- /dev/null +++ b/ContraCLIP/checkpoint2model.py @@ -0,0 +1,51 @@ +import argparse +import os.path as osp +import torch + + +def main(): + """An auxiliary script for converting a checkpoint file (`checkpoint.pt`) into a support sets (`support_sets.pt`) + and a reconstructor (`reconstructor.pt`) weights files. + + Options: + ================================================================================================================ + --exp : set experiment's wip model dir, as created by `train.py`, i.e., it should contain a sub-directory + `models/` with a checkpoint file (`checkpoint.pt`). Checkpoint file contains the weights of the + support sets and the reconstructor at an intermediate stage of training (after a given iteration). + ================================================================================================================ + """ + parser = argparse.ArgumentParser(description="Convert a checkpoint file into a support sets and a reconstructor " + "weights files") + parser.add_argument('--exp', type=str, required=True, help="set experiment's model dir (created by `train.py`)") + + # Parse given arguments + args = parser.parse_args() + + # Check structure of `args.exp` + if not osp.isdir(args.exp): + raise NotADirectoryError("Invalid given directory: {}".format(args.exp)) + models_dir = osp.join(args.exp, 'models') + if not osp.isdir(models_dir): + raise NotADirectoryError("Invalid models directory: {}".format(models_dir)) + checkpoint_file = osp.join(models_dir, 'checkpoint.pt') + if not osp.isfile(checkpoint_file): + raise FileNotFoundError("Checkpoint file not found: {}".format(checkpoint_file)) + + print("#. Convert checkpoint file into support sets and reconstructor weight files...") + + # Load checkpoint file + checkpoint_dict = torch.load(checkpoint_file) + print(" \\__Checkpoint dictionary: {}".format(checkpoint_dict.keys())) + + # Get checkpoint iteration + checkpoint_iter = checkpoint_dict['iter'] + print(" \\__Checkpoint iteration: {}".format(checkpoint_iter)) + + # Save latent support sets (LSS) weights file + print(" \\__Save checkpoint latent support sets LSS weights file...") + torch.save(checkpoint_dict['latent_support_sets'], + osp.join(models_dir, 'latent_support_sets-{:07d}.pt'.format(checkpoint_iter))) + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/download_models.py b/ContraCLIP/download_models.py new file mode 100644 index 0000000000000000000000000000000000000000..5dedf941419288a3aabc757bd95b8728ad76148b --- /dev/null +++ b/ContraCLIP/download_models.py @@ -0,0 +1,168 @@ +import sys +import os +import os.path as osp +import argparse +import hashlib +import tarfile +import time +import urllib.request +from lib import GENFORCE, GENFORCE_MODELS, SFD, ARCFACE, FAIRFACE, HOPENET, AUDET, CELEBA_ATTRIBUTES, ContraCLIP_models + + +def reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + return + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024 * duration)) + percent = min(int(count * block_size * 100 / total_size), 100) + sys.stdout.write("\r \\__%d%%, %d MB, %d KB/s, %d seconds passed" % + (percent, progress_size / (1024 * 1024), speed, duration)) + + sys.stdout.flush() + + +def download(src, sha256sum, dest): + tmp_tar = osp.join(dest, ".tmp.tar") + try: + urllib.request.urlretrieve(src, tmp_tar, reporthook) + except: + raise ConnectionError("Error: {}".format(src)) + + sha256_hash = hashlib.sha256() + with open(tmp_tar, "rb") as f: + # Read and update hash string value in blocks of 4K + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + + sha256_check = sha256_hash.hexdigest() == sha256sum + print() + print(" \\__Check sha256: {}".format("OK!" if sha256_check else "Error")) + if not sha256_check: + raise Exception("Error: Invalid sha256 sum: {}".format(sha256_hash.hexdigest())) + + tar_file = tarfile.open(tmp_tar, mode='r') + tar_file.extractall(dest) + os.remove(tmp_tar) + + +def main(): + """Download pre-trained GAN generators and various pre-trained detectors (used only during testing), as well as + pre-trained ContraCLIP models: + -- GenForce GAN generators [1] + -- SFD face detector [2] + -- ArcFace [3] + -- FairFace [4] + -- Hopenet [5] + -- AU detector [6] for 12 DISFA [7] Action Units + -- Facial attributes detector [8] for 5 CelebA [9] attributes + -- ContraCLIP [10] pre-trained models: + StyleGAN2@FFHQ + ProgGAN@CelebA-HQ: + StyleGAN2@AFHQ-Cats + StyleGAN2@AFHQ-Dogs + StyleGAN2@AFHQ-Cars + + References: + [1] https://genforce.github.io/ + [2] Zhang, Shifeng, et al. "S3FD: Single shot scale-invariant face detector." Proceedings of the IEEE + international conference on computer vision. 2017. + [3] Deng, Jiankang, et al. "Arcface: Additive angular margin loss for deep face recognition." + Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2019. + [4] Karkkainen, Kimmo, and Jungseock Joo. "FairFace: Face attribute dataset for balanced race, gender, and age." + arXiv preprint arXiv:1908.04913 (2019). + [5] Doosti, Bardia, et al. "Hope-net: A graph-based model for hand-object pose estimation." Proceedings of the + IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020. + [6] Ntinou, Ioanna, et al. "A transfer learning approach to heatmap regression for action unit intensity + estimation." IEEE Transactions on Affective Computing (2021). + [7] Mavadati, S. Mohammad, et al. "DISFA: A spontaneous facial action intensity database." IEEE Transactions on + Affective Computing 4.2 (2013): 151-160. + [8] Jiang, Yuming, et al. "Talk-to-Edit: Fine-Grained Facial Editing via Dialog." Proceedings of the IEEE/CVF + International Conference on Computer Vision. 2021. + [9] Liu, Ziwei, et al. "Deep learning face attributes in the wild." Proceedings of the IEEE international + conference on computer vision. 2015. + [10] Tzelepis, C., Oldfield, J., Tzimiropoulos, G., & Patras, I. (2022). ContraCLIP: Interpretable GAN + generation driven by pairs of contrasting sentences. arXiv preprint arXiv:2206.02104. + """ + parser = argparse.ArgumentParser(description="Download pre-trained models") + parser.add_argument('-m', '--contraclip-models', action='store_true', help="download pre-trained ContraCLIP models") + args = parser.parse_args() + + # Create pre-trained models root directory + pretrained_models_root = osp.join('models', 'pretrained') + os.makedirs(pretrained_models_root, exist_ok=True) + + # Download the following pre-trained GAN generators (under models/pretrained/) + print("#. Download pre-trained GAN generators...") + print(" \\__.GenForce") + download_genforce_models = False + for k, v in GENFORCE_MODELS.items(): + if not osp.exists(osp.join(pretrained_models_root, 'genforce', v[0])): + download_genforce_models = True + break + if download_genforce_models: + download(src=GENFORCE[0], sha256sum=GENFORCE[1], dest=pretrained_models_root) + else: + print(" \\__Already exists.") + + print("#. Download pre-trained ArcFace model...") + print(" \\__.ArcFace") + if osp.exists(osp.join(pretrained_models_root, 'arcface', 'model_ir_se50.pth')): + print(" \\__Already exists.") + else: + download(src=ARCFACE[0], sha256sum=ARCFACE[1], dest=pretrained_models_root) + + print("#. Download pre-trained SFD face detector model...") + print(" \\__.Face detector (SFD)") + if osp.exists(osp.join(pretrained_models_root, 'sfd', 's3fd-619a316812.pth')): + print(" \\__Already exists.") + else: + download(src=SFD[0], sha256sum=SFD[1], dest=pretrained_models_root) + + print("#. Download pre-trained FairFace model...") + print(" \\__.FairFace") + if osp.exists(osp.join(pretrained_models_root, 'fairface', 'fairface_alldata_4race_20191111.pt')) and \ + osp.exists(osp.join(pretrained_models_root, 'fairface', 'res34_fair_align_multi_7_20190809.pt')): + print(" \\__Already exists.") + else: + download(src=FAIRFACE[0], sha256sum=FAIRFACE[1], dest=pretrained_models_root) + + print("#. Download pre-trained Hopenet model...") + print(" \\__.Hopenet") + if osp.exists(osp.join(pretrained_models_root, 'hopenet', 'hopenet_alpha1.pkl')) and \ + osp.exists(osp.join(pretrained_models_root, 'hopenet', 'hopenet_alpha2.pkl')) and \ + osp.exists(osp.join(pretrained_models_root, 'hopenet', 'hopenet_robust_alpha1.pkl')): + print(" \\__Already exists.") + else: + download(src=HOPENET[0], sha256sum=HOPENET[1], dest=pretrained_models_root) + + print("#. Download pre-trained AU detector model...") + print(" \\__.FANet") + if osp.exists(osp.join(pretrained_models_root, 'au_detector', 'disfa_adaptation_f0.pth')): + print(" \\__Already exists.") + else: + download(src=AUDET[0], sha256sum=AUDET[1], dest=pretrained_models_root) + + print("#. Download pre-trained CelebA attributes predictors models...") + print(" \\__.CelebA") + if osp.exists(osp.join(pretrained_models_root, 'celeba_attributes', 'eval_predictor.pth.tar')): + print(" \\__Already exists.") + else: + download(src=CELEBA_ATTRIBUTES[0], sha256sum=CELEBA_ATTRIBUTES[1], dest=pretrained_models_root) + + # Download pre-trained ContraCLIP models + if args.contraclip_models: + pretrained_contraclip_root = osp.join('experiments', 'complete') + os.makedirs(pretrained_contraclip_root, exist_ok=True) + + print("#. Download pre-trained ContraCLIP models...") + print(" \\__.ContraCLIP pre-trained models...") + download(src=ContraCLIP_models[0], + sha256sum=ContraCLIP_models[1], + dest=pretrained_contraclip_root) + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/02be4f3503db069a28be3bf222c0f64ae6f85d05/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/02be4f3503db069a28be3bf222c0f64ae6f85d05/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0691b176945c66eee7f22d92270b2e59d9e65e0e Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/02be4f3503db069a28be3bf222c0f64ae6f85d05/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/02be4f3503db069a28be3bf222c0f64ae6f85d05/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/02be4f3503db069a28be3bf222c0f64ae6f85d05/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..05bf80e57bba619989dddbc64574d10b25ba157f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/02be4f3503db069a28be3bf222c0f64ae6f85d05/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce54ca0a139e42f1c79fe7f60d576d4a485e36627318c7c246275dee69a15ee +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/10c29d1257e7c6e513d8ef23599ba6ba89eda181/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/10c29d1257e7c6e513d8ef23599ba6ba89eda181/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6b0437a0e23699fd00d0adebf657c449eb22b554 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/10c29d1257e7c6e513d8ef23599ba6ba89eda181/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/10c29d1257e7c6e513d8ef23599ba6ba89eda181/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/10c29d1257e7c6e513d8ef23599ba6ba89eda181/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..46bf5bd44b44eb9cdc4d2476c55b0bbf271b7233 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/10c29d1257e7c6e513d8ef23599ba6ba89eda181/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a511c6edd052939a88acc05a299f3da41a5b1f05270d2443fd8a8e916bd05f1 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/323234c425e1b4fd5ec0539bb64765d72afffc75/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/323234c425e1b4fd5ec0539bb64765d72afffc75/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8929c84db63cbcdf07870bf854c2eea38605616c Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/323234c425e1b4fd5ec0539bb64765d72afffc75/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/323234c425e1b4fd5ec0539bb64765d72afffc75/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/323234c425e1b4fd5ec0539bb64765d72afffc75/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..44fad96a0849393e677325346c55ec3f7c68c188 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/323234c425e1b4fd5ec0539bb64765d72afffc75/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70da19d64562dae4c03e15617a55024c30070f7419ed9d32adcfe5d5240b7adb +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/555510a5999a3c5eb3097e0b80da4cee97088c8e/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/555510a5999a3c5eb3097e0b80da4cee97088c8e/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f8b48ffc0d4297397b71b6032ff101bb43d29dcb Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/555510a5999a3c5eb3097e0b80da4cee97088c8e/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/555510a5999a3c5eb3097e0b80da4cee97088c8e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/555510a5999a3c5eb3097e0b80da4cee97088c8e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fc9312cec2fad597b84977108682167fc316553 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/555510a5999a3c5eb3097e0b80da4cee97088c8e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8342234862b5c57bcf3c4153837f6654052f1de4192f1e7d0a464a9fc0360550 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/911ea1a1d3b3e6b57a819ad9310048384608ce08/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/911ea1a1d3b3e6b57a819ad9310048384608ce08/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5fa9971b6a49e9c60a8e3299faa9b1137b2c947a Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/911ea1a1d3b3e6b57a819ad9310048384608ce08/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/911ea1a1d3b3e6b57a819ad9310048384608ce08/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/911ea1a1d3b3e6b57a819ad9310048384608ce08/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d60e6cd314191f30529ac24ff8e66c0973c59e4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/911ea1a1d3b3e6b57a819ad9310048384608ce08/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a5e51d71a466329e2396c8124758dfe3d05f4bab42c2d782b4e4f77af30bccb +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/9232b69c406fece5016ccfe260a226eaef1d9181/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/9232b69c406fece5016ccfe260a226eaef1d9181/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c413a0660763a73e5d4980dadd57c6deda3de0ab Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/9232b69c406fece5016ccfe260a226eaef1d9181/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/9232b69c406fece5016ccfe260a226eaef1d9181/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/9232b69c406fece5016ccfe260a226eaef1d9181/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f99afbec45e1c849f96f9e821412b0c5ca6a7b7a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/9232b69c406fece5016ccfe260a226eaef1d9181/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6f5a35eb58c4c2e7228f423e5860730cdb0d212855fc374fc471702d6b3339 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/c1345dab91e4c82070858e3201bcd7eac0bb042e/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/c1345dab91e4c82070858e3201bcd7eac0bb042e/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..447d4a6505cddbae31f05c9c6808b79f6cefb3f2 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/c1345dab91e4c82070858e3201bcd7eac0bb042e/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/c1345dab91e4c82070858e3201bcd7eac0bb042e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/c1345dab91e4c82070858e3201bcd7eac0bb042e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5a791c3603f9aa6aa4fcc6469b2a9310811fe95 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/c1345dab91e4c82070858e3201bcd7eac0bb042e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afd158af814aaf1aae0455127b3d5eca7d1e599cb9bad2f45f1d1a7eb4fbee7 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/ce26bd5026197c1df60bc43ab1a99f3db8730b0a/image_z.jpg b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/ce26bd5026197c1df60bc43ab1a99f3db8730b0a/image_z.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f8ea42735ba2e71c635bf6fa7a9e993e044e750c Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/ce26bd5026197c1df60bc43ab1a99f3db8730b0a/image_z.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/ce26bd5026197c1df60bc43ab1a99f3db8730b0a/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/ce26bd5026197c1df60bc43ab1a99f3db8730b0a/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..11c61dc87aba4562d8b1fe8d5d1ef4293e9c7aa2 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/pggan_celebahq1024/pggan_celebahq1024-8/ce26bd5026197c1df60bc43ab1a99f3db8730b0a/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfaa6a1a2189cbdce8d8371428d259aaa9de83fa3975352be57694ca3e3c1144 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4c5b436ec76e9f526a2b51bb79100f4ab49118a7 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..c88c0755c40c5558d86098d00e87c12746429ca0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82712b3f877423756238c59c19cdc49b131aeab9054e977e0eabce7dae881261 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..8343e52131941a67399cdf5c36a86d15d875db10 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d3f23f63d0002b4bd330fcabad40161443181b380c2ee343f381d6f57406410 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f86f11a57779972d3c66e34571ba8e2323fe596f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/08307a8eacf4509f45ab65e8ee76dc53d089dec9/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee26d0ce68a05d32fcb28eeb8a13418c1978c62c3f3daab5b97fe48fb6a8cc8 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..53a5caa8d6f943d3a92f490916d139e1be07e001 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed9b210f7a4b2f08c982d4ec8b69b5b662b22a87 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0a3002d8d50bbc7702c1390726fddb57fa1d384e6b3caecdb462db0aa272a2 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..2eb809a9bc301ef25fabde287d3fea288fd22640 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19769cbebb4bf0278d55ebc46442a54d56241487fdf26eaf1cd7e392959809a +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4ea1f2d488214aab175afd3122bce9b764fe13d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/28c1c29df1be16a26914078f57b2b95598496048/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562f242b6885c9a829a8f4b0e6af453f3dd53684aa182a4ef6b90d871cada48f +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..05012cc9668f7c2e7029e3b0f3f3cf11356cc99e Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..007f466824bfff659ec2d47edb2ce26fb043d264 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d745141e50c6a3e69ecd5480ad9f22547e78854c2024b3f43ce9b4bcc5627565 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..a672a60a42c674b32b3a8b7ce7def5b229580abd --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d787702e6e28f7a54afccbaa2c26bbd728af5c1edb33ee15f5c214094590a7 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a938bea7996d713e591b9cf88ce3d3ab881e04b --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/3ac589d77dc2845eda68b3e92b92f5aef972bd93/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874978425c2472ec203918a10d359dd1e9cd9c795939de4263da071a31bc68d2 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ecf46b2a2e529bfe37609e79f33170ebc31d2583 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..167ddde10d0f5115d291957de46412255b39b521 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccfe3e29ba646c227f286e2554b758e17f339562ef093e6a66e8a86bb8a70948 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4f321a9e247900960b8329236a150af10321ab1 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3b76d1580404c16254926af0567e1a14478a4343c30f47d2666f5b3ab2cb80 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82e9d039337271007e048f834354a4e2b91284b --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/565fd0382c69e4c9462179dbce46cab36b576226/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee2b9d54714f6a7b8ac46a709599cfdffe20b550f0301d1069d8264855d6889 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4a82b94ed302be40e90c6bc92ec93b06eb3b777d Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..a84f45ad7b5964ab77ad1c6b88bd6a85ad7a6104 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66213a5a461870c4b818be52684f06475076904d3458a487aadb3d49bd9427b1 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..f02842f86d8548da98ca63b3585abede70f3b6d6 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001cae0eb200dec5984724bb15f5b55abc69d2f76e6ff9ada0628d2c56fa7b80 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..c15000acf8f24e4c713b852c5af4a0cadd85f56a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/584c090fdba130d896e7b67f942df55f44baf022/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89eddb1448b8b7cab265f23114248dc6ef3f9b398b188ed4237c1488d1d31b4c +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..88ca1c7e61cc2b15ba1a539469e8615300a59fc1 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..05fd8b0a3ce7655aa29be73162061c5e0c2de0f0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bed42b35a7ed1da7fa51000244723e23b662b1873f68403ec94b3098c79e696 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..98a078e8f1fbac8372a3d16214d14b89884c290c --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cce019f116bb2d7e1999b6a160e0cfb405712953616433a64ff076ddf387997 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..82b19b39245730f1af7cb49184b4e29b9228bfe7 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/60ace58591602b942ef7816000203c07479baf1e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c464e73f08f4b5288848c1ee775b36d79c45ef1533aec4ef99f09c8b4a125c2 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fd68d790fd9d01f86b37f9e8c14eccb37c36fe3e Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bf2adb42c8ff20054273238668575926f916e67 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aaa266547d4a8ea6e40c399c8e2923c5b7c5b86ec463ba3e6ec3f47103fa28d +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ab213526a8d1307c8838c560bf185b125fb37e0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2107d9d5e9e2ae036dc8ec731b7e335d3ab9f0a58918086101804a3197280210 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..973ac295cb1e6ac5d6b6501feeeb5b279f07c130 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/6e3a4bd20238f6964cb447efc2bf4f9ae889212f/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010c319d0f4638ef021dd33a819c17c3f9fa8c5bf4403d2c2c462c335d790d54 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dca5624adce22a8958107d264c7c5c71b7910ac1 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee1cf6e829578943bf81888e1020d40069a732ce --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc864c2f14f2650460a9c3c1b141260d9fdb84f357c9849b2c4ba23fdc6b900 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..26f47d84c30dbca6b7ded41af59c775d37be42e3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a6cde23f3db7b24d2415e8887e4d00aeea817eea435299e705f12d04c75fbb +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..34da91e6f5f2fd4120ef9889d884837f8be3bc3f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/89577abc4b195d823ba8cf80e9405fc7bc822ebe/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad1a4ce1d2ff93b5073d14f01a3267d1e7f13d5e7cc6e113369f9e70a59e6fd +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9e4e5b8954f9dba1783891261c55f1b049ccd81c Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..746523f73fbbe5e44d50e259d3e426e9f31a00b3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10dbed7772d3786fde0e14d3062d5c55ad8eea96edc8b1baf80748b341f08661 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..61e8219fe0533c16ddeb4d8f144d5f173d9590c2 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31fd84ac38275af274939d3b0902dafcf7d34a65ec024be977db0abda788c891 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..704cedd30ff0ebfbc964db423aaf543943790900 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9a4e239f497107fb53d0943509e87a96b1a7a054/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c30a90d5a0d2f67be12a07961f20c613d1ddc5c61d35caf957ae185a11e12f9 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3dae80ac6ff0105dfa23f8b0515c79e569a87c09 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..c277c177b86c79dc50a1fba67fc8afcb52eae75b --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9988e4688d93214f9cb8f1f2636f977467c9bf8ab87bf5f13d43c238da183d +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..11c484539342dd98ff5a8427dc7a39c794696340 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927ac78c019bdcb1ceac1454c17c0ac398956e463ddb5add6eb625d3bea7033c +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bab376576f5fcb09fe33c57a1b660118704fcca --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/9b83fd8d69c3b5656528ad9ed3f17660db26c941/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867d53c4d38060e1246e9720a988351385d6d0979e6c330aa867ad105aa05fe6 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6b83d2d8a6f36fb22c48a185ec2e84cfdc956f99 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb7b776512471bcfa02064cff2681294a094fda7 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3faa7d18d4b50352cb21ef37e1132605855ff1ca45cda390fa990e0b357d718d +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..15771b4eca68465eddde1f6cdb744ba15db5e8f3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c017ee404bdb712ffba52dd48b54a96206fac4d020c5f0f5fe9d2ea3c02f37 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cdcdc97e39428271f0bb414b7a181de23453d28 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/af24c3fa4e59d2b0d821a6f4d687f8a79cfc5be6/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a32f7f940cd3c15feaa6bba26e54de172a80ac8d511871acf6759ba59cd78b6 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f79d2b2fc7f4e3581f26c1e34b62f87c5e73b226 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..07eeb244a971e10a16771d17c65973a638bb1ca8 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47f84e0c0eaea7335c1dc23f6062a86cd63c2317b5a396dda2d1f6223514226e +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c91796618ddc5541d66103c87997077a55a10fa --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76bd3012bea9f3f822874978836734bc01c755b987dc9f67d854e64519f4a26 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cd46ae6bf676f755ad3a0fcc3d4de043cf6c0ec --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/bcedcd2defa737e93437f0b0088a2c205b40aea1/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60809b6b5a96851973504310b041a4eed90464c717b9c2260e726bbbed4ce48 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fa9281e10d74cfabe6315d25e6be3c6ccd056447 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab691fa33739f1fbd1e40f3936383f5b27051fa4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6de50218b34ab17d8ede06f32cd06cab05d89b982800a7c43f9fdd679c9c154d +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..d355693f6e1ae43b87934dafa9382d3e7d9b3a13 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63127c467e1d6bff0e9dd4a76ea2aefdfc67d4a886ec22404a28d7859bd059e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..4399f0d014d14a971a231ce018a5f92ae2b55a5f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/c9aea1b9d9b1f2d18492fe9c62beba573e2c3f96/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b710a80243c077fa6f127910f33354c77db2a7607ba0c2564eea1950683bbb5c +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..63f9c768fa89b45fd0a8f7a75943689280e5892f Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..6874cba270ab2e2bffa47b7829821e4e3599156a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d6b96539a8fb16e2ad43fc5e0df45324d9e344963c393d5da2ee5a0de7e254 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c4cef3fb1147256c18933dc8c8f0efb1849cf07 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adfe1214314f89e79249c49ca8ee092353f27e2e8fd6b391a17c9598af11347e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..35f4bb730be25c791684a9671a2b579ac72ced54 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/ca7400fdb6f3ed1e3b73cdec11c8c4abef13bb19/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eead93812a71ebf937a8155ef4dc769726bfafef9e4f604f52959817f2203edc +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e3aeadeb5f564947be57f12f02552f861f3df2b4 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae36f5f001b89b28429549690113e2155d41da74 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40b5128325cd49f04e5ef80a51041ba2dd9979d04b5747c4c20831f9de2c176 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3d7a711676082b5239615530b31d8ee7ad2eda4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309563bb83816e150cf43d90f81b9984ef8c823c31ee354081405c7cae28fdb8 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..2371afb58c4e43bb83e163c9e0e6f3f6239e4661 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/d5615b74fec41e0e92afef9eda6dfeca9dbad13c/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80fcf7052de0bf52761582d4419e2ebf6be4807ba9c458bbb7e06a4622df85f8 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..662203f824dfd785541e6fe08ae19e5c097f324d Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..8463f454e7dd645bab5565b3b0a7f5370e8e1866 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e6cfec353c0d6e9616f787502a9fe8e9413175d5b0cfbc356128cfd5c22964 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..91d08aacb6e50e91077fa13b65cbe64f0e7f5b9d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d127089a20986a6bb94c0dafa454fd850c3ffcb31f699b6e585fee13f2c87ea +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..82cbbd8e47917d1ea25174b98d60a92ee16b7c45 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqcat512/stylegan2_afhqcat512-16/f9e6d201c4b618c5e5120fa8e016f363e1c2e98e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9594f7f69cbce468aff9d55a4083b74cb30a52e277b6cfbaf5cd733697309a +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..da11742c8526dc299a85b4b370e1989195039c68 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..20d14f9b16a12e02965d6be2650bac03e0b48b5d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b2543d0763d365031f96b49bafabc05875282450b0fbd6176ad7c5ee77c82c +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..c87de3a047cb5e4072bb4eaf0c71266733214fbb --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8055a360d9d2de35b9b15eb8026fecbb728598a1477eea93d2297d5f5ad9bb23 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..39f29528781958a0403964d3118d705e7690d0de --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0463142bbe83824e7bf2fefd6c72a1bf0a207bce/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fe9c3cf610e985dea8e529464fe84467b435b438f4bcfb72c5eec371031ba9 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..22dfdf52a9cf4d5b404992f4bad0dfb599511394 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..4303747469d283098b7395832b28bbcc60daf104 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf5277837ab91d78cce3010c878826818a0db512c6b17db37b27517a4d69bb7 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..cde122776c05e59c63ad769c18cc077924e4f665 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c4c97ec8a61262c81a4b7b9bfc64ceb75511f1ac06eb62aa0ba2b5dabfd1c0 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cb65d5f62d0593526a441e0a623f25c987c4c7f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/077567fe124b0f5753c9c347fc03a65bf3186346/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d778d0905634e87d48166a4e94dffe30fe30034dfddaebf13b4009e887d4ff66 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ec358a952974bd89b5f8b193666591283b2b16dc Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc486389ee44a5f839f767faeb4f6908879fd8d9 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbddc69029784afd44a90736532b4676b1415202fab40f54242b23d30b2e6a3 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..b858e69fccb2dc62d8711026c3c469536b9315b0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c921b2561be9f8e6e4aaaaebaf6b88e87c2e53d4d37c43a1acd59f0dff9a8e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..0803985a8edb9f8f99babf996ce8897e42aa8bb3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/0de03a0e39a01626b7c2d3227aa8ac73b11665ef/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ee6939c08f54a7fbe74c2c60045861a10d34cc5e69696758bba6366795b7e4 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6400d666ff95365ae1249ea677749f85393fb663 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..36ec7b4856239e660a190ee64964f51b3de36e59 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81098cac03d59841142b3035aebb00c87fb16197706f98e23977ebe5454daa49 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..fade77ab2b321d7209ecd79129b8455a0cf6181c --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1592ea0b079acbcdef02425f1f47aafcaaeeb3921f9551dd7f3199489fe3af1a +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..bec115ed4b1ce7882d73936d54d7a33256574f34 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1562e27145bfc46b95bfde17da52834456a97ebb/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a6298be86547ee8da1d0d7500d293b95f52e6f389b43ceba0d985ff1f238f9 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9c62d2c35ae809014273764bacd1c3d9436f665d Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..99ee8f26012ba629d98dd11cc7d439582119fd3f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a610e961721a7cb7854a52bbe0e4e25a280aa3b288fd5042ad40828e096618c +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..b045504b064d3ff03ba2b23767788bc5cc895f31 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc1004e1bde9e253a5fbfc6184c9233f06bea80298319624ad9f6f54b904944 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5cf1792524bc66875e83bf96936a387e2030596 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/1eb6471cbbc59a43d54d401bd629f5c366521e42/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c298f5dc3d840a1505a8d336c7379c2f37d6c0c6effe34fb3e45b7437b1c01e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..39cbf54fabe1768c001a705929beecc1c202088f Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..844fdfa74bf8016af9dc7a5406600f7cac575ae0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a62baf727f9d608a2813c39ac57df0b2b5846a4b9d40762b7602e4b6502d56 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb7c1cb435c32af3b3be06805efd05e354cb5f7d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79e5cfce1072bac89f23373ad99f1166bd2af1005cdbce802c9eb3be167404d0 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f162f272658c470ec8bc014b308d47da6286db7 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/2f4a27aa4017c24cd7a7a92b3358e00651a8a1a2/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ad029b78f1a56a0d8ef24b8a46672fa3b3d72e36394a76770ad83ec2335519 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d1e41562cbb1210ec95ca944688f0f894a40468d Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..f00af918f3df3385e20a1fd61329e16481be6909 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2668335518c61f89d9d9cbd48ce327213d898711e206a791440b8547e959f908 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..246cb4b15751d1b3c5116a52a8997db8d6e71f4a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28ae06f3d4508ff62fa6d787126e802584f9cc77906c2a0033ca4e7ca4594e5 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..a71f6eaae63d9c5c474c7cc4e05a44282661e157 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/6d0ecb1946f829954a33a2d908a11bff2bbd3002/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c9ab693ad65de6bac4b2c746cd92450a34d1d245115953988c583e7f8ab0f9 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0228b8c48ddaf74020216d99ae3785aae65953e0 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..b491a1e45e63f3e795fa5dd6c48ba5fbe2b2a8e0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97abde68c429de0b8c3331684ca8cafadaa69ebf8400c7ec9270cf18426c3d2c +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd1b66b8c92e9c012791002e1528f4e0d570c116 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e178c0a5062f1df30d04c81b4dc4afee97123c53c9420a070651600acd2952 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0215cadd00973270d8ad7689163f0ea22d1b82f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/815399c6c38959536e2c6e5f48667dccd6a512a8/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412ed7388de19dfd7abbba95112ba95535b67fcc75f8d4d78b903f84c2d2a253 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..65e797f3cd25728a4cd89d70cf1fd351438bf846 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..962ca452000873660fba182eec39b8a7bcd59215 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1982f997ddd025242db4ed5bcbfe515b9908f1383aeee6021c8914c1a891a2d +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..071c7191aff1b1485de3113aae619defa0ff53b3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ec49ace1b62334f7772b011db17620c55f66dc1a56a635bec65ff7e3eeeadd +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..86095cc2115b7d6dfc890cbe3b37e351889f1be5 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/874f844a4ee646e92367a15d7e1332c97653acd8/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a16c7d4c1521adf0892b081f1f8eb1375ceec7cf2609c5a35f18fa72a5fa34 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..16764aec56baf80ae1349752708185177279f89e Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..25bf1268815fbd8b810a3beb06f8a796d131272e --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60aa97dd3546651603f2babf6a37d70d777d4ac4ed38ed0185a7083df83d6b4d +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e67a17efb4e21e87d8edfe52750f16c35576443 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0cd91b1bfcb16c2c7a573ee7b455e813d3fc7aded18b629175952ed0e50c09 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fddbc0e1ca756dd0b0072088cec188e191fecbd --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/8a1acb8cfe03864184fbf17d7d8ba9dc8d2cfa2f/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924391cba3363a00b93b9fbeff7470299c92406c531bb50cb047ee8988f27492 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2372efc4c0958b61da1cd3e8148364a5df3e1a29 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e1e6a6392dc9de1943072ee5b3ccba08b2c16bb --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c606df2ee2a4a095867e4d6f8426879f0628438aba6261a67f169f675ae6c8a +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3c20cb498687dea564a4f7afa708e0e2e78a0b0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6595a40bf4b31341b80597c2a32ee06e5760dce3563901bf1875af6a2ece7a09 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6217fada43a2ecd1b4f1e90313dbae270994e721 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/99c3da57fc496d25733a1c1401204d9895f28377/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834d63fb50118542a1935ee26cb16bb11adfc8232f13dc4ef0aa23f875aad6b5 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..78537325c9328cf46e1d555df43eb76850fb3cee Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf4a2be109a9339bae976c56f12f812d4c0e87aa --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db828e29bcf7b2cdf62be6cd9c7f502911e88e2a619de4660678a74be9c9ba3 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bd90829d7912c0e60348e1bbd94cbbe2d33187f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1356da47449b8a100c568019e71725da2168c5a100a47f0aca730dd62cc52b4 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..28a7920672f2a2327c637035850d7ac26b711462 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/9b4a17f913ec9cee62779ba402c3b9d401323138/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8135b2a5628a7326e632187846fab8cf85f765fd41d07c098f3faec0252267e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..227ac789d42de32d40085ce362f33804132cda49 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..f01a94bb5b0fac93ab900cf2754e4f396fb9a720 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6def3b23939736ba8a83858257ff08e14641d9b00f0882712c787a9ebd5fceef +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c3fa6800213e73d3625a1b490fc4fe4b54d0811 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519931b93ecbe1feb895ac6c1845f01dc920d7572f76921926d9cc8c655d7b99 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..3411dbafcfa8b92333acf889991b44ea2e2c52d5 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/de30a3baf786d977cf84abddffe9d888296abf6e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49252009a876e3b3489733cc0d0d9c79fef08d3710ecaeb99e9e5bd9c298e8b7 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..47d654da58aa61307d4a985a845de2e030e82cbc Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..a021b6cc05d22da411eec895c6d390bbee718918 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc54eefb49c9d5012ee7197b17a877266920c870f29bd6c6eb328216fbeddd3 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..16a71990c196aa23fc15dc0e81c83ea8205ffaad --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae808d4cc7c333e8a71e5df3559eb1e9cce1f483a0875863a3909390248da68e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..55ae6177134fa63466d85593c97974f000c49b59 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e4379be3d1963761580553b2653074ded1fa1c09/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b43a0e583e3f40cbd5c95226aa82bb31527069693dac77e3f962c9386084a505 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0b9d3dece8aed57f36063b3023d85e2bff497ca2 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..33f4e1a1fecf5f6cc528248edbed947821b9f9f0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c0cf18d39a4a3f4fdc2999bc49f620dbb6b969fc7a97f7bf6db0f2e7781c50 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..412b4cb86cfabb067f3c79d3e46e81ba274dc8f1 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:484da748a6f10cddf5f12f05546a69b62020de470b08fa3a924a298ff0d562e8 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..a98d55fe6583b8a4539cc7cd2ff4bdb001966274 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/e9f5e924c152c105999676e552a845052b68e1fa/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10ea3f624a036e78aaba6afc0a25185ae74dbc7384ef48c5da4030039165fbd +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9e2b7fcb46a3dffd834f46dd22ed033a01485310 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..42a05101587a957034c424bd10012b67b02d8089 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe97558343dc4c21ab5c92ecf483eba4de6d75cb8944a1bcf4ff55001e4ecc9 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..5732070ae2f7ec04c8893d9a416b3d722a650a5d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e43d89b1fad89c011a724b3c29968e868dc4a7a2a24420d89c20fec82782ac +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..834200ccc5bdc03da05248bd7e76b8473c8855d5 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_afhqdog512/stylegan2_afhqdog512-16/ecc0ed4453aef077a0309ff014f4312c507ac097/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d45f57f7e531086bd9b1154d6c91807bd06fdd6b7d1a172fb7b1e83e02b0b03b +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ffeb428762ea7f2558fd9d704e2481621c755685 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..6db9334c24a4daf3d7416b8d9d7b79456c96c93d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b003cd8ab127c1a9f6cb01a6aae18f30d9ae6b40d5faeb0d7ca0243737ea0bf1 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..4017a129a74ac8a121a462e8800d11a7a78105df --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb030fc9bad57d370ca206eb404b9727da08252716cf43219c8776760394205 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..7855d18384d9837aea0a5e2e24816ebb48b4c171 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/1d924879fd06a3a8f24986e12da72a623218c806/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ac1b8dc9718c43e0d1681500ebbe70f5f5724cdaa04db2c5ddc6a279413651 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d7f9bb26dbcd9674e21229f4b3628b46350981df Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..285cd971cb0201affd9ecd0deaa01ebc46f2280b --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc4e1460b256a42f6fd2244a2954c308f1a1c96112115d03513f56ea5abadaf +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..574130d2a64c5471cb793819def0e97524e2a8d2 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892ad64d9bb0a3259488418443b90fea46f375f82f0a693468199199466c4d4d +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1436886cdbd5198002dec2a23a9609d9c351cb4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/520fe7046c4fd543731b3474edfa32c1f54b6059/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefe7b15c91b8d3ea56f8ef09f67eb7b2c8f2710ade10701aff9d3c54b2d82a1 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..103970d34e9abce3e3bcc79a3e86f256db85d48e Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..a56f4acd909a8829e06eed71963acd6d5df5d22f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a58bfb284bb3b780cb1cbfb4e43942aa2ac1476ab45d8f95e0d3c4d13aa4bb +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ec55699cf6f7a6444ac6a3ebc05ad7c32a73b85 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8312bfca1bd3a22e9356e496b91e264123a60762952f4ead77525419b1e027d6 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..7402f5fe5f6fab0391e92128ff6216e829f71614 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/531fd681a1151ef10ad84ea831a7d1d0ef1e1fe5/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e930ef92930df7cf72b05b4e0b087f7a387b43e38f9fe7388c3779241ae5ed +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d9d25b7228e7c5802db47518cb31948bf6f13bee Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..a27cc9cb212505b9b7789a229a9f1f041e81dfed --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752f5cd7172da49979280f275f7915d0e4bfc311386607e492c2d555515e8113 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4343f57f345485558f7b6102a82d2915548ad7e --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86aa4994263ee6575821b2f5e2877b5489f6ab56b848eff826e52f36f1c9cec2 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8d0169731029797e49bbbb50dfa77e55612be8a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/6f76e6d3d34abd2bb0e4b828a466f712068e51f5/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af5bb831ecf2a50cdee1e15fcc16393e9acc6a4eb50833694e617377eb987f8 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d7aedeb6bcdbf72ecb7b85edcd9efb60bc773280 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..4020e8f396711f55dcf1e5640548058debe26d81 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc3a1c36a7a0c77ca7cdcfd378912064aad0f220aa5499111dc3b62ef43f5f6 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..43d0195e9308b84b35ffedc53b75492ffd9668c6 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc8692eeec20a06798f966eb08a9edbe4790978ebf98cca0a34c2005e90fe94 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..bec4346b9e1ec73453a9529c3e65575a59c2e681 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/94a43dc42e327119d1d2e93e94a54df0b768bac2/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01540da8986d61eae37ed273b09cd842e216d1b2a24f48d5cfdaeaa5df5d142a +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..af16ae5eaf0e2b7198272b23f55fc6b59ffafbd8 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fa78623770971da5dc780620edb16a9b9ef5dca --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0120f21903e4b1bd7a44a2d926e40621dc09165ff14a15849628a7ccaabd8631 +size 33515 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e62bb87c18c963a1b5fb21df1bfdf099418fb5d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c97f82748d08ea1312d9265571c7d54dbaf63ac3fb1591ebb6a9421c41fb9587 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3d57cc9d43b3a22d855c6b7a11c03cdefc6c4ba --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_car512/stylegan2_car512-6/c0fab804798759160ad78d89127ae9c4cf920632/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb35f2e1d47ade6c735c2bc76cb2e93b9b98cc78dba70504042afa90b898b0f +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f01e483f5914b749c53ace003d71f69efa276680 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6b5e7244a8366229afc7161cfb3a9ad9fc53549 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d533602b20a9aabc4a0d556343628354509f10039f12d269efee71ef656a52 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f43b99088aa65518677724f1767fae9100b9203 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6fb5cbb73e6f29e5ca9eff030a676f278c5a12adce51cbaeefc405dd320f242 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..94e3abf3bcfda974ec8e2b100e49e0f3edabf635 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/04526c213876853f6f9b0e54a56ead34ba8f2f74/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97af29b2a2ae82eaa2434759783b7febaf594af9f0fb949fee475cf306d4bc50 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a8891da7954bd77d9e1a2f348faa60b9eb815bef Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba9085a0910485f09c170ef3d43b455ae31ea181 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8c595f09fc6cfaef9e838d29146eb61a1335f57b98a36062f4c076319d9f2c +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b6c22bdd3f5d87eb93703d26b80c3a81c66e169 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a13a0fa4e109ed1252f01433e4878a5b6dbe42b223df67619a8990ed94a1c5b +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f3f081187cee393dcbb6316f64eb3fe386f8378 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/061531669e22f739f903fb0d53e7c3e3a5740a71/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dafdd0bed3e6fdb76c7476113c5c4cad1bb7ed8be90a3cd334257f50d5ec687 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a17ec0e831aba54acbfe066b9a806c708b723ce2 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..21c07a830cfe475c8b15c5bf18fdd116f9d80292 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bc52f1feebcbd86646b0296886c92a3050e96670a9c6f13a1a46c4eba898b35 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4476f67882e3e3fd5aad4517a500def4cf1c460 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a6872c03e5c48054cabd16c449ef27a1e3fd8ff3d6df0b37aa190ae95928ea1 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2dbcf4bb2889c482c627f403c16af6be26ab76b --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/2fdf2f455b771553db7f49a5a53616d56ec9f9a8/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c940a4dcda5a42e3827aee4771c9868027524fe2beb4813c36c865dfaec529 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..09f72f9e8ba0b11f704c81e207c6907b4015a572 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c6c10779e01a171a6290f012ca1efe1d002bd08 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb4d9952cbaec95321af9508a9a3dac60aac8c4f18d32d46a1115a810154877 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..dabb8aa92cd2e38463a5220115e4536fad68bd4d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616a7f1d65d03fe8483d434aab644ca5c87ffc397ac6f324b1000b3f44ce2baf +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..a67135f5aa196f574b9f93210ec150a11f2f8c5e --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/31305e289b48ea0198293a4d9376db4cea7b0980/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a269717ef3697f2f453557834c458f80142a80e22f51f72be52ad157b7cb34cc +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..49024c11f94f4cadc6e5780776e91f414e7dca35 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdf20e40259c98409f0d4f2251f364dd95169538 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42810b62a4d44f17dd69120e5a477a76d6de4084fc199613c869f06ad9913c2 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e6629a99379155d95b7816ba315f40e14ce977b --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04fde978e97ff83321634a7ee6aba0f0e231ff382a26443a1ea83892c3ba522 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..68cd64c16b9a6c838406dfa31eed7338089097cb --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/33c28928f810c12875b86b0ff03217eb0dd15c82/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872f39f817c62a84fa99999b65a686b97fddbd67f8f1150b5cf459794939f771 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6ad0cd335370a8f9ff17756dc8e06a0473c8c3ba Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..791f2aea6c9a63050344491851239460eb0365a8 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57bf26634e34e3575097907328386eeddc98a2191a4e8015f375878111a70e92 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..f44e023330393d3ea6ddf2a3edf689e8f957b6fc --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40470df528af58717edaa1a609b899741d0ad9ddae10f5c9c181e04b74a7bd78 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..2292096cfa50143ecabd7d7151a6e1811fa7ce07 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/659a7756c7ca60fd83bdb02bd0240dbec04cd521/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa196d7c18b9f1a0a583e88cc6e21d6fed95c50f9a5e265d1e5ed5f712376245 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..406a3c4699063d9e767c9dd234e8aa8fcc9daaad Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..afbe4e386e3e0d4b39af1cc7ddeb642ab6054dd3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ef1b5e20bd588798503ce2cc2aa394cefc3caf95e3c807e764196212fe5711 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc18f0a2ebf6e986af2b40abdcea5db7dc158ab6 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:023f5a5b100b7a53560575cad01b9ea2db2a36445f18d0dc8e913b989aa1e7f3 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d7b14936ca6866ede3c334b689a567e680fb3eb --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/6c77add2e8f629e5d4d6bfd9666399903e48c60e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05faded7332b91e8a11e19f0d4cd67fd8e6fe64c1f49a924d2933460b1fb04d9 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..63cf2b8739c3555c86e75cc2c82af0395b65867d Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea5fbdba6c97f357c0565f078eb6f4d548bef4d3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42685956ca03652050a5c885eb2b087de6b966b7dc2aad8e74ac080d09db1ca +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..546b6fd66da490dffccab1d866aef8fc394eff55 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c003836d8734cd1fcc189627fb1732f46f3f93a1bb89f9f19d05b5cc30efe996 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a5879f997d4597d02940cf4095f72ff37c1d79 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/7392a6f6d3a992a595d28173bcf6f32ee001b010/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c99856f6139b65668b3227796e9d6cae3de410da43ddf118b997aeb5c1432f1a +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0dfa23a403da4d28a664a7457002907c5fc769ff Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..97f4996d9ac9a3ecb22d26618f8b1dd40d29752f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026b0d706f6289a7c2bb7dc101f2f52cf13d22145073ff8475dacd8675179525 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ebfa868c2d940cb88adfc84fae4e10f75ad68b4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61723107c581a43130a6e738078d15c92f5f920478e55f60b7025d848912fce1 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae50359296ce97af9e5d9ab3aa85349c3d8eb3b7 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/878f7bf17476fe86bd8cf65c725cb9c55e44e974/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d01ca9e17256e1ccd29fedc0c4df5c5cecc919ac470520b1e9ba33dc9ebf248 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c421da6b1f01570ed56c75755b1e00dd3d825018 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ade39722e27793e8c9357c42b08ff503df23d68 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f3ee54e96536798271d3912adfd24e2329910cc3071b70217122694c7f788c +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..16ad0c0207d0812677898fbd314f50d18c9502cf --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5827f53e204359c0e601a3117300155deb9b03c072f95b2a3692653b8197e99c +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bfb8a927af7ebd5c539f46026009b466fca9125 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8c09d47f17d52314ca7162e38d490fb6c4b008d6/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c4807f31be32ac2c2d7b434bc91b5ce9e87769c1dfc829edad89f89fe92e5b +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f0a52a916711b04c3eed470a5c7fb2ddc36e8566 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee70d3d5e59fb9a5dfa67bbca8dea074fdea98e0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1aff4f70845caa0c2d4e61a8fe09829a6addf8bd3de679bd0f4f95417b28276 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..719f0467279dbf7fef53f46ebadef299961a7f1a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa2c03cc2e5277e34d75dc6e1a0a34ba82539716c145394a1b4d24e9acefbfd +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f7f5bd56740503b7dbac206ed0f75230aacca02 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/8d252ccf786cfbe723258e8f6980b4a5ccc8cd25/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01e509748451710eba3b06a287306fd8997a40a27613c1094746724b262a510 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..41ea339b67c3d6425a8bcfac85f26adb7dbdc753 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..acee7dba929ada384e13dd92b3b74bc06fcd568d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e99c80ca7835442df36d7900763286fa19d2668d14cff424f17bb2f9e9571f9 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..674fadcba84e0b91d9c401c0d9218cce53142fb6 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82349ebd3e7971c06637ca7c19b7bf4052a024aa866c98ce0a851e610b1be277 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4f6f204364f7c65205d34d971d84a4eaab0270c --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/98ad551739989e268af60312983afb7eb8103e94/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b65f3e285473afcbccf8a84bb75a19321053fa0d650228e4da3eb44aff6f43 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a94ccf27adffaac8250f7e0f51db04b8d48ddcd7 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9cd86e8c5edd49ce3b1e95e191fb08fbdc84f80 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ee29241fb3bc2a795e17211054715cd804b96b25fe9c136a8b1aad63cbf24f +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e7d9f750f99c925b6ec1acb2f9ae6f6ea87160a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05159d2bda450f38c647a7ae2007222796a679159016b726d7c82cee1aaa613 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c55d23e07d22dab3d9f3a6609831413bfd88200 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a3f48cdc3be44636a39b96600908dc6ed3819a7e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b0ee79bbbd78a2a9ef00d994e352d452190c01376463554809c36288c35160 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..57c552606c3550ba227a1b5f5c7e2c3735b142c7 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5de34ce3ef975425d68ef78854684415668461c --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df57d97c8c03937271360920755407d07db20327abe1b4781f603a521a80042d +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..6054a068e26cc669fa6adb9d87d325d15607ec2d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02284c7e3ff55da9ea01b8211cc5c487b1e459187f1699ad0f11dd0540437ad6 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f0cab17dda9d659caf0d4c3a6975ebc7f4773ff --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/a528e7c5291f77fba4203a0498d3ec362720a80e/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cea00fea3c4676fbdaa0a34959d6e8c6abd77c09afee0aadee9958411c0f68 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..83148751676c3713affe4b98b8db4f1e1562e1d9 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..15c49f06323577902404769702c10e913978e939 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3954b1c2165807d6ab63836ec70b78e87dbb13c19c5ade88390126545f2f6e +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..b56832a6b12d1b3ebbd1043d86a0ed0e56f8af66 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4f1feb355dafb60575d0c4296bb36efd0a95d2a080d71ce7df96adcf484f41 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cc9b1bb3306c906217c65277066651020e93c10 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ad783230b1e8e797c15d1c1e5b53b5bb2d02684c/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30a56c9912b721949753e8407273cfd559a49872344831fd97739cd7cac3329d +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/args.json b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/args.json new file mode 100644 index 0000000000000000000000000000000000000000..df7ecb2b1faa357c608a486604877771d9e02255 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/args.json @@ -0,0 +1 @@ +{"verbose": true, "gan": "stylegan2_ffhq1024", "truncation": 0.6, "num_samples": 32, "cuda": true} \ No newline at end of file diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..507c08b1ea8f79fbf5bea741a1668f626e82caa0 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb6689455d6dfeee1ae09c704b1127df507c54dd --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b62dab8822c2bad6358b415264bd88d5921fcdb0e60f46399d948e3f816232 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea5245a89f6ae813c629bd02161e1c6124ccfac3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ba0306b61d00bc107d3a5e8ad19a8897cebee191f8f348c5e52f6fa2c07ae4 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..99e42955838c297f8f4e4f788de8a227c13ec62f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/b6288a72efed9fad118fa8acf4c9d1c3d1f5b95a/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0560208aa2f33acfba0d10dd42f0c86e8c4d4a0a7a35c09c295d26e236ac524e +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d5377a9d94fdecc15951d355dfb241e493e3b6d8 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..4608f67ce95638aa98fb5afead87f93fd7c33798 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a053d3ad5b49d5ba1ccd8672f517cd17cbf7e22d73bcd58fed180539ddcbf81c +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..53b50a61bd4b07c17aa2edb1a6c21bfd3910ccfd --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50d5a53956f9e491438d28c1b2ea9634f5765c3fd0580fdf2ae8042a1b1cc04 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2e1a8881e5d60992a94e86dc78f6c4b81cecd1c --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/c7f6547058e69ca074f6bb3efd09c929f5286a54/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cbb1c5d731a653640dc50b39269ec9bb700ce63ebe4647c6c7c9225605db12 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..03f85c5d2865846c78b3e4d7355e3e318206bed7 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..a989761577cce197ae3a209f5697f91e6b6c2305 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9dc4264b6bf3416c089e3add2ff2e18d963901c6849594d1da2823eeaa88e3 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..cea0662072aa27b38858c7b128a4e6d04dccf4d2 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38486556ad6c9ef778c809ee7c9cdd08ae56a1e9a27a5e06ad83ff41578585cc +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebebbc9adf8d5ae5aaa26c3bd02d0e3206e91cf1 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/db0b42a002de7b3e2e8108887325dba6c35bc4f0/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3496f45889420b66f66184800f29427e782ab631d4cfb4afed7757a525947f0 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4d28d1b566b8f916d757384578bc544d5e450c9b Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..79b4d4ff888b19194800d4fcffcf42048201b456 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d28f9bd414800920b64e701eb4d74583ee7a019624078011cff960524cf2cc9b +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..0770a2c3f09b33199c830958fc29577cce5adb1c --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0adcc8f2bc3124ea4f3511a5ea468077755f1ac9beadbfdfe6d891362a39da9 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a768b917017940e61131db25b3c7ffebc6ffc1f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/de3f8235fd67f4a29b1c6a20a03e4e9f8e190762/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a33a805513e757a1a97a0158592fe04304440eb0a79ae4387c6373c5df0de81 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..074a9febba3f30be1e9c8f507096318532ec08b6 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d292ebd049dfbe69873416701d9c21399a7e444 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9fe6124060189d024cfb7593259b82573073f120de47d172f3c8fa31e902802 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec0d93429fea8ffa1e896b0fa5a8f3f3471467b6 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfcf77704192c9d3be0eb58a67b34ca06946edc8d67f00431b162985025da46d +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f095505121a762717d470fa48c4155b8a6bd4bdf --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ee626ad420db4f65c7b87400d5df84ef93a54493/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07419572280e15c69f1de9c3add18609b2057efcb57d55183b2a26a7d96e5512 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5a55461ff8c9250eff7a180d19c6aaf11ada886b Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ee273a0b80b4d8706feb815d87ed55a49a35938 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c1967c496efb3875db031cded40590426e7093a796740c74bc8243da899b72 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fdd26f84e1171b8c5a748ad3fe61093573e2f0a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89af02370cec191cc9ec1b8cf74381a68ebe13409ed737f7ecaf1bb89524f307 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..42bdc37dc00fec55f4fc40f66b3cfa4fd32b9a0d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f33933d12ca2b10f95b728281662a4a9aad28122/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3feed7e09329466dcfcdb2c847f414121f018181b19874bd2c71f74a1054d568 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..eda06ddedd2287800f99d78b867e0ab12430f02a Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..f33befcab6f7049eba386d9c120e63b7ab748c72 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0648d23fb2cc3b7b7ab155c665291a231b5defe763b0371d5923a78fd92b763c +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a9945b922d9c95e0c59146eb79fdbaef4a0390e --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9d38fbbd30d5f91822988fb15b0227516900f91bd11143cb4d0c8e328a7f64 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5d9d4f5f07d270b9e31aeb5ae7861d86c07aace --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/f799f5a7f594e558a767ae52b9a429125b2d98a0/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5d01d66f5d3cda59146a99e8fb72ba4a3a078814d13ad3171eb1108a42cc03 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..acef4f86945a046983d9e80979e6ec3f2e56bb61 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2ca2c44dd8c62d4623bc9cf8891723317710389 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab9fabf9d0a67be87098a17efb55022399de749ac3b2136cbe197ab4d58c7c2f +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddd2cce07de2cc5fc9596593cf31b265318d8ae4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e976fe254c4b30a44ca09f7a22fe6e055b6dd5966156dd75ea78b8a6f8cbe85 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..14494198fb15db0508308810be00fdf8ee39cebb --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/fb47d334c666d982cb101b5857d38365dba6314c/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e4b5df16154194da6ab8bc6b5344bc57ad85ee2befc125c6edd80d3c387b30 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f8f2ad4fb0b7d69c1171e5f8e52fe5f6d25160de Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..11a9d34d7837607af13d172d22ab45e839286a7a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b526d913397f377e5beb7937699de4c2da3dc27a0e6202b7a1196886a40f6409 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..07cc27905de2eb3cb3ea0a210e04df38be47157f --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76226a6405d7fcfb8e5de6f5397a932a891cfc46e2a78311380cc15ddde7bc5 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ebd0a17fa8836f3b64ff00188c3171b7a804a8a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-32/ff506f7f6d36e63294c2a75622021f9698f64700/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc7f2d3ce60017b5554aea2cd51a677134a2c010b1932ce5e57fa992382cb25 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5577cb486f63f8d2d13357f7e08e80a3e0bce87e Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fb68c70ba1dc3c366ebba985865812850215847 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60aaffae15212290e498e704a35bbc88d921dde163a0347660c0555d99e20ce2 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..b42eb145b4affc9f278421d0ac0b41905463377d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1c576044023eb5fe6003268dc651e1d5d0ace410438d05c069c30c48708a41 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..110e86fa4117fd59b7561fba9319cd84a6eb94a3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/093a9e396147052b408441c1ae4a2e22c0424399/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6043ac397dfde96fcf00dd33242138f1e34b8174aadbe8795beea86d916c02 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e2c273ec8e6dc5640a0852c5ab1a70d69405b0b2 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f3aa993f1dfd3a5a61f919efd31a4e2e39b583a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d163ff8fa30356f0062c864aff9e16bd5372f416a50441a8b619c758f2186827 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef9e994c33c935fb30e599b57ff90eb83ad3b90a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0f7af8a1d8f2e5f57bb5b5ce80ed552e415e56b7c4363ac2c6809715a6c604 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c96b63712e92c2beb29671071c999297ca2ee74 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/1f87a27641c933a203f15e6ac780d79aa1eb4e78/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166417451b3d9fd85538b9f1143d789bed55932242f16dfdba5e1658c88d49d1 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f296c5c6e5b79e03d00bc3b00acdeeffcad1a9b0 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..3eb9e43468bb0348357eb9edacd9b6900d8fec41 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f701d8c2f2cf4abf4bdc3dbddad55156ed4d6a3329f1c02512c3b5283c58ad9 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..993fcad2866c3778026d17bef7608b9ad6123d38 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a74ccce23d8dd2affaa4761ee67565f0f43761f4e5dc6f939406a977d83a788 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bc1e28d4b83fa73a1c81077441092897341386a --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/40f9b00ba37ababf3cd56c8d7d2ed95f1c6430eb/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80fe54be54c515d7d25fe9ebd457ace0d96e492d562dd925501b1698899b0b8 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7167af9934d26daa2deaff65064c97840cb11103 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c8466d14ccd22622658e4be294026647959e4a0 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d723776da2d8cffe5d6b0319f9a061c858c160b7b4519dcc4b7d6edd000eaa65 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..42ab1d1240547032ae01ba0f2849ea8e36a77517 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1d0649ad000bdfad3843437f8988e78f5c0de8f87e6d80ae4a5d7d43c766fb +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..07fb1519bf9ccce69db129e9c212cd7155b6b658 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/57ab952c3ea4036ec9a07343ca732cb10c7a7b61/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b12362984b7f119f615ec885abc188be13654aa43f038c4f5b49d829167e34c5 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6ad65a7143b7bf8aa0ab7e636ab70384b9162c5f Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d164b59b62474e18936ba43b38444266173e075 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420291460d9293a809e0d2af43ad5649e9f038dd05e1fb35b1203cf8ea8906c6 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..9aab97e385300c21b410432c32686b52361fd662 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc21b2bd2bd66daff075e968363bc3eafac493f630021d0e201010054f0ea46 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..1930e782059cf32b75f54f11460dccfafe09897d --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/91b3adc96287d530c6d5267e6865a3792b4bfbe0/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0df5fa3dbe465d95b9adc986eda7248da7e1cbe42ce1fd58c33db16fb8efd1 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4f54808fa71d3dcabfbb366e1fb272d3b50450c4 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6cfa69210dd6223b97f28b5e519ff4c41a0f8e1 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4b390fd8fe10c50a2034b9d66578d3f69b90b44740ed6cc58ae38cd320b388 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..5747d2dbc0e7382b72710e253043b0c4c249a8a4 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7d03c3643c3a6825e9b2d3b2530d0f6ef6d3e097e63b6b41911265c9cf2a9a +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..1887e9bbb6bf3db21a781f82e6195e43802dd037 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/b9a54ee842954467223862c45a94e77a3a9127ae/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6940d6b4a1f8fff521572a10e9ff0433a93fd5a6023d5338afeac76bae4f4d7d +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2a569f1bdb3c5846dca12acb177618f05bfe77f2 Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..e10339316b0cbb29a50522b5d45977546a614f40 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d88087e385b1d03871872f828aa9055f673dbad07245f7874723d5df5e5908 +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..232bad0f713ba53dd9aa9beff63db7f81260380e --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb387742efda146d27e86a187f6c1cf7b8cc179a6b4eb9f2c16ac1f4a8d92fdf +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..67fbd4889fbc018e13e4fe3bc54253713b1f54fd --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c0a332139b8485537e20d9cf786c663ca601cd32/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48127a86763fd2aa875c7ade516d298beec5e6d936317cf9e6e5ef977d436471 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/image_w.jpg b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/image_w.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7e2a08389b4b17349cb17ca40c8b2e28fe1d7bcd Binary files /dev/null and b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/image_w.jpg differ diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_w+.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_w+.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4820588cf7166ae69472a6c86870c71e87094de --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_w+.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca94d1c4669984a2f683c66182c64593ea3a96bb95fc588fcd40fd18dfae791f +size 37611 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_w.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_w.pt new file mode 100644 index 0000000000000000000000000000000000000000..9401a1b5e459752ec8f04ce0c3700d591c3ee001 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_w.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979ae56b776579d10188fe311bb3ef60614a16422895a59a304bb06233ba8766 +size 2795 diff --git a/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_z.pt b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_z.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad4c6ec11cecbea550e555a4f69be0a2b3d9ce3 --- /dev/null +++ b/ContraCLIP/experiments/latent_codes/stylegan2_ffhq1024/stylegan2_ffhq1024-8/c91f3b81621bbc56f41c39e3f462da3121b89da9/latent_code_z.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af62197d2fb6e21af8f61117d9261fce4f4c19f72bfb1b3babed534a07a8b2f7 +size 2795 diff --git a/ContraCLIP/lib/__init__.py b/ContraCLIP/lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55b15a0d9ef78ffad6094ec1b98b3133e51bbc92 --- /dev/null +++ b/ContraCLIP/lib/__init__.py @@ -0,0 +1,13 @@ +from .aux import create_exp_dir, PromptFeatures, update_stdout, update_progress, sample_z, \ + create_summarizing_gif, tensor2image +from .config import SEMANTIC_DIPOLES_CORPORA +from .config import GENFORCE, GENFORCE_MODELS, STYLEGAN_LAYERS +from .config import SFD, ARCFACE, FAIRFACE, HOPENET, AUDET, CELEBA_ATTRIBUTES +from .config import ContraCLIP_models +from .support_sets import SupportSets +from .trainer import Trainer +from .data import PathImages +from .evaluation.sfd.sfd_detector import SFDDetector +from .evaluation.archface.arcface import IDComparator +from .evaluation.celeba_attributes.celeba_attr_predictor import celeba_attr_predictor +from .evaluation.au_detector.AU_detector import AUdetector diff --git a/ContraCLIP/lib/aux.py b/ContraCLIP/lib/aux.py new file mode 100644 index 0000000000000000000000000000000000000000..2fbd3fd7cbc261a13aa6fdd1669c2a09b7ac8668 --- /dev/null +++ b/ContraCLIP/lib/aux.py @@ -0,0 +1,282 @@ +import sys +import os +import os.path as osp +import json +import argparse +import numpy as np +import clip +import torch +import math +from scipy.stats import truncnorm +from PIL import Image, ImageDraw +from torchvision.transforms import ToPILImage +from .config import SEMANTIC_DIPOLES_CORPORA + + +def create_exp_dir(args): + """Create output directory for current experiment under experiments/wip/ and save given the arguments (json) and + the given command (bash script). + + Experiment's directory name format: + ContraCLIP-(-{Z,W,W+})-K-D-css_beta_ + -eps_ + (-/linear/styleclip>)(-/cossim>)-- + + E.g.: + ContraCLIP_stylegan2_ffhq1024-W+-K3-D128-eps0.1_0.2-nonlinear_beta-0.75-contrastive_1.0-10000-expressions3 + + Args: + args (argparse.Namespace): the namespace object returned by `parse_args()` for the current run + + """ + exp_dir = "ContraCLIP_{}".format(args.gan) + if 'stylegan' in args.gan: + exp_dir += '-{}'.format(args.stylegan_space) + else: + exp_dir += '-Z' + exp_dir += "-K{}-D{}".format(len(SEMANTIC_DIPOLES_CORPORA[args.corpus]), args.num_latent_support_dipoles) + exp_dir += "-lss_beta_{}".format(args.lss_beta) + exp_dir += "-eps{}_{}".format(args.min_shift_magnitude, args.max_shift_magnitude) + if args.styleclip: + exp_dir += "-styleclip" + elif args.linear: + exp_dir += "-linear" + else: + exp_dir += "-nonlinear_css_beta_{}".format(args.css_beta) + + exp_dir += "-{}".format(args.loss) + if args.loss == "contrastive": + exp_dir += "_{}".format(args.temperature) + exp_dir += "-{}".format(args.max_iter) + exp_dir += "-{}".format(args.corpus) + + # Create output directory (wip) + wip_dir = osp.join("experiments", "wip", exp_dir) + os.makedirs(wip_dir, exist_ok=True) + # Save args namespace object in json format + with open(osp.join(wip_dir, 'args.json'), 'w') as args_json_file: + json.dump(args.__dict__, args_json_file) + + # Save the given command in a bash script file + with open(osp.join(wip_dir, 'command.sh'), 'w') as command_file: + command_file.write('#!/usr/bin/bash\n') + command_file.write(' '.join(sys.argv) + '\n') + + return exp_dir + + +class PromptFeatures: + def __init__(self, prompt_corpus, clip_model): + self.prompt_corpus = prompt_corpus + self.clip_model = clip_model.cpu() + self.num_prompts = len(self.prompt_corpus) + self.prompt_features_dim = 512 + + def get_prompt_features(self): + prompt_features = [ + self.clip_model.encode_text(clip.tokenize(self.prompt_corpus[t]).cpu()).unsqueeze(0) for t in + range(len(self.prompt_corpus)) + ] + return torch.cat(prompt_features, dim=0) + + +class TrainingStatTracker(object): + def __init__(self): + self.stat_tracker = {'loss': []} + + def update(self, loss): + self.stat_tracker['loss'].append(float(loss)) + + def get_means(self): + stat_means = dict() + for key, value in self.stat_tracker.items(): + stat_means.update({key: np.mean(value)}) + return stat_means + + def flush(self): + for key in self.stat_tracker.keys(): + self.stat_tracker[key] = [] + + +def sample_z(batch_size, dim_z, truncation=None): + """Sample a random latent code from multi-variate standard Gaussian distribution with/without truncation. + + Args: + batch_size (int) : batch size (number of latent codes) + dim_z (int) : latent space dimensionality + truncation (float) : truncation parameter + + Returns: + z (torch.Tensor) : batch of latent codes + """ + if truncation is None or truncation == 1.0: + return torch.randn(batch_size, dim_z) + else: + return torch.from_numpy(truncnorm.rvs(-truncation, truncation, size=(batch_size, dim_z))).to(torch.float) + + +def tensor2image(tensor, adaptive=False): + tensor = tensor.squeeze(dim=0) + if adaptive: + tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)) + else: + tensor = (tensor + 1) / 2 + tensor.clamp(0, 1) + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)) + + +def update_progress(msg, total, progress): + bar_length, status = 20, "" + progress = float(progress) / float(total) + if progress >= 1.: + progress, status = 1, "\r\n" + block = int(round(bar_length * progress)) + block_symbol = u"\u2588" + empty_symbol = u"\u2591" + text = "\r{}{} {:.0f}% {}".format(msg, block_symbol * block + empty_symbol * (bar_length - block), + round(progress * 100, 0), status) + sys.stdout.write(text) + sys.stdout.flush() + + +def update_stdout(num_lines): + """Update stdout by moving cursor up and erasing line for given number of lines. + + Args: + num_lines (int): number of lines + + """ + cursor_up = '\x1b[1A' + erase_line = '\x1b[1A' + for _ in range(num_lines): + print(cursor_up + erase_line) + + +def sec2dhms(t): + """Convert time into days, hours, minutes, and seconds string format. + + Args: + t (float): time in seconds + + Returns (string): + " days, hours, minutes, and seconds" + + """ + day = t // (24 * 3600) + t = t % (24 * 3600) + hour = t // 3600 + t %= 3600 + minutes = t // 60 + t %= 60 + seconds = t + return "%02d days, %02d hours, %02d minutes, and %02d seconds" % (day, hour, minutes, seconds) + + +def get_wh(img_paths): + """Get width and height of images in given list of paths. Images are expected to have the same resolution. + + Args: + img_paths (list): list of image paths + + Returns: + width (int) : the common images width + height (int) : the common images height + + """ + img_widths = [] + img_heights = [] + for img in img_paths: + img_ = Image.open(img) + img_widths.append(img_.width) + img_heights.append(img_.height) + + if len(set(img_widths)) == len(set(img_heights)) == 1: + return img_widths[0], img_heights[1] + else: + raise ValueError("Inconsistent image resolutions in {}".format(img_paths)) + + +def create_summarizing_gif(imgs_root, gif_filename, num_imgs=None, gif_size=None, gif_fps=30, gap=15, progress_bar_h=15, + progress_bar_color=(252, 186, 3)): + """Create a summarizing GIF image given an images root directory (images generated across a certain latent path) and + the number of images to appear as a static sequence. The resolution of the resulting GIF image will be + ((num_imgs + 1) * gif_size, gif_size). That is, a static sequence of `num_imgs` images will be depicted in front of + the animated GIF image (the latter will use all the available images in `imgs_root`). + + Args: + imgs_root (str) : directory of images (generated across a certain path) + gif_filename (str) : filename of the resulting GIF image + num_imgs (int) : number of images that will be used to build the static sequence before the + animated part of the GIF + gif_size (int) : height of the GIF image (its width will be equal to (num_imgs + 1) * gif_size) + gif_fps (int) : GIF frames per second + gap (int) : a gap between the static sequence and the animated path of the GIF + progress_bar_h (int) : height of the progress bar depicted to the bottom of the animated part of the GIF + image. If a non-positive number is given, progress bar will be disabled. + progress_bar_color (tuple) : color of the progress bar + + """ + # Check if given images root directory exists + if not osp.isdir(imgs_root): + raise NotADirectoryError("Invalid directory: {}".format(imgs_root)) + + # Get all images under given root directory + path_images = [osp.join(imgs_root, dI) for dI in os.listdir(imgs_root) if osp.isfile(osp.join(imgs_root, dI))] + path_images.sort() + + # Set number of images to appear in the static sequence of the GIF + num_images = len(path_images) + if num_imgs is None: + num_imgs = num_images + elif num_imgs > num_images: + num_imgs = num_images + + # Get paths of static images + static_imgs = [] + for i in range(0, len(path_images), math.ceil(len(path_images) / num_imgs)): + static_imgs.append(osp.join(imgs_root, '{:06}.jpg'.format(i))) + num_imgs = len(static_imgs) + + # Get GIF image resolution + if gif_size is not None: + gif_w = gif_h = gif_size + else: + gif_w, gif_h = get_wh(static_imgs) + + # Create PIL static image + static_img_pil = Image.new('RGB', size=(len(static_imgs) * gif_w, gif_h)) + for i in range(len(static_imgs)): + static_img_pil.paste(Image.open(static_imgs[i]).resize((gif_w, gif_h)), (i * gif_w, 0)) + + # Create PIL GIF frames + gif_frames = [] + for i in range(len(path_images)): + # Create new PIL frame + gif_frame_pil = Image.new('RGB', size=((num_imgs + 1) * gif_w + gap, gif_h), color=(255, 255, 255)) + + # Paste static image + gif_frame_pil.paste(static_img_pil, (0, 0)) + + # Paste current image + gif_frame_pil.paste(Image.open(path_images[i]).resize((gif_w, gif_h)), (num_imgs * gif_w + gap, 0)) + + # Draw progress bar + if progress_bar_h > 0: + gif_frame_pil_drawing = ImageDraw.Draw(gif_frame_pil) + progress = (i / len(path_images)) * gif_w + gif_frame_pil_drawing.rectangle(xy=[num_imgs * gif_w + gap, gif_h - progress_bar_h, + num_imgs * gif_w + gap + progress, gif_h], + fill=progress_bar_color) + + # Append to GIF frames list + gif_frames.append(gif_frame_pil) + + # Save GIF file + gif_frames[0].save( + fp=gif_filename, + append_images=gif_frames[1:], + save_all=True, + optimize=False, + loop=0, + duration=1000 // gif_fps) diff --git a/ContraCLIP/lib/config.py b/ContraCLIP/lib/config.py new file mode 100644 index 0000000000000000000000000000000000000000..88b1747d6ddce0f715ee848dd0e5190f54921e64 --- /dev/null +++ b/ContraCLIP/lib/config.py @@ -0,0 +1,213 @@ +######################################################################################################################## +## Basic configuration file. ## +## ## +## ## +######################################################################################################################## + + +######################################################################################################################## +## ## +## [ Semantic Dipoles Corpora ] ## +## ## +######################################################################################################################## +SEMANTIC_DIPOLES_CORPORA = { + 'attributes': + [ + ["a photo of a female.", + "a photo of a male."], + ["a photo of an old person.", + "a photo of a young person."], + ["a photo of a smiling person.", + "a photo of a sad person."], + ["a photo of a bald man.", + "a photo of a man with hair."], + ["a photo of a man with beard.", + "a photo of a shaved man."], + ["a photo of a face with makeup.", + "a photo of a face without makeup."], + ["a photo of a person with closed eyes.", + "a photo of a person with open eyes."], + ["a photo of a person with open lips.", + "a photo of a person with closed lips."], + ["a photo of a person with tanned skin.", + "a photo of a person with pale skin."], + ], + 'expressions': + [ + ["a photo of a person with a happy face.", + "a photo of a person with a neutral face."], + ["a photo of a person with a sad face.", + "a photo of a person with a neutral face."], + ["a photo of a person with a fearful face.", + "a photo of a person with a neutral face."], + ["a photo of a person with a disgusted face.", + "a photo of a person with a neutral face."], + ["a photo of a person with an angry face.", + "a photo of a person with a neutral face."], + ["a photo of a person in surprise.", + "a photo of a person with a neutral face."], + ["a photo of a person with a sad face.", + "a photo of a person with a happy face."], + ["a photo of a person with a fearful face.", + "a photo of a person with a happy face."], + ["a photo of a person with a disgusted face.", + "a photo of a person with a happy face."], + ["a photo of a person with an angry face.", + "a photo of a person with a happy face."], + ["a photo of a person in surprise.", + "a photo of a person with a happy face."], + ["a photo of a person with a fearful face.", + "a photo of a person with a sad face."], + ["a photo of a person with a disgusted face.", + "a photo of a person with a sad face."], + ["a photo of a person with an angry face.", + "a photo of a person with a sad face."], + ["a photo of a person in surprise.", + "a photo of a person with a sad face."], + ["a photo of a person with a disgusted face.", + "a photo of a person with a fearful face."], + ["a photo of a person with an angry face.", + "a photo of a person with a fearful face."], + ["a photo of a person in surprise.", + "a photo of a person with a fearful face."], + ["a photo of a person with an angry face.", + "a photo of a person with a disgusted face."], + ["a photo of a person in surprise.", + "a photo of a person with a disgusted face."], + ["a photo of a person in surprise.", + "a photo of a person with an angry face."], + ], + 'expressions3': + [ + ["a photo of a person with a happy face.", + "a photo of a person with an angry face."], + ["a photo of a person in surprise.", + "a photo of a person with an angry face."], + ["a photo of a person in surprise.", + "a photo of a person with a happy face."], + ], + 'complex': + [ + ["a photo of a man with a beard crying.", + "a photo of an angry shaved man."], + ["a photo of a man with a beard crying.", + "a photo of a happy shaved man."], + ["a photo of a man with a beard crying.", + "a photo of a shaved man with makeup."], + ], + 'dogs': + [ + ["a photo of a happy dog.", + "a photo of a sad dog."], + ["a photo of a long haired dog.", + "a photo of a short haired dog."], + ["a photo of a friendly dog.", + "a photo of an aggressive dog."], + ["a photo of a dog with big eyes.", + "a photo of a dog with small eyes."] + ], + 'cats': + [ + ["a photo of a long haired cat.", + "a photo of a short haired cat."], + ["a photo of a cute cat.", + "a photo of an ugly cat."], + ["a photo of a cat with big ears.", + "a photo of a cat with small ears."] + ], + 'cars': + [ + ["a photo of a jeep.", + "a photo of a low car."], + ["a photo of a sports car.", + "a photo of a city car."], + ["a photo of a modern car.", + "a photo of a car from the sixties."], + ], +} + + +######################################################################################################################## +## ## +## [ Pre-trained ContraCLIP models ] ## +## ## +######################################################################################################################## +ContraCLIP_models = ('https://www.dropbox.com/s/bootpdxhnp9z6ce/contraclip_models.tar?dl=1', + '0941c96d311700ef881bed38350d6d0cc38151255a34db94a5f9400758398a7f') + +######################################################################################################################## +## ## +## [ SFD ] ## +## ## +######################################################################################################################## +SFD = ('https://www.dropbox.com/s/jssqpwyp4edp20o/sfd.tar?dl=1', + '2bea5f1c10110e356eef3f4efd45169100b9c7704eb6e6abd309df58f34452d4') + +######################################################################################################################## +## ## +## [ ArcFace ] ## +## ## +######################################################################################################################## +ARCFACE = ('https://www.dropbox.com/s/idulblr8pdrmbq1/arcface.tar?dl=1', + 'edd5854cacd86c17a78a11f70ab8c49bceffefb90ee070754288fa7ceadcdfb2') + +######################################################################################################################## +## ## +## [ FairFace ] ## +## ## +######################################################################################################################## +FAIRFACE = ('https://www.dropbox.com/s/lqrydpw7nv27ass/fairface.tar?dl=1', + '0e78ff8b79612e52e226461fb67f6cff43cef0959d1ab2b520acdcc9105d065e') + +######################################################################################################################## +## ## +## [ HopeNet ] ## +## ## +######################################################################################################################## +HOPENET = ('https://www.dropbox.com/s/rsw7gmo4gkqrbsv/hopenet.tar?dl=1', + '8c9d67dd8f82ce3332c43b5fc407dc57674d1f16fbe7f0743e9ad57ede73e33f') + +######################################################################################################################## +## ## +## [ AU Detector ] ## +## ## +######################################################################################################################## +AUDET = ('https://www.dropbox.com/s/jkkf1gda9o8ed47/au_detector.tar?dl=1', + 'dbdf18bf541de3c46769d712866bef38496b7528072850c28207747b2b2c101e') + +######################################################################################################################## +## ## +## [ CelebA Attributes ] ## +## ## +######################################################################################################################## +CELEBA_ATTRIBUTES = ('https://www.dropbox.com/s/bxbegherkpvgbw9/celeba_attributes.tar?dl=1', + '45276f2df865112c7488fe128d8c79527da252aad30fc541417b9961dfdd9bbc') + +######################################################################################################################## +## ## +## [ GenForce GAN Generators ] ## +## ## +######################################################################################################################## +GENFORCE = ('https://www.dropbox.com/s/3osul10173lbhut/genforce.tar?dl=1', + 'f9a0f98435cac4fb7599c2cc29858e48365c0998f9f48079efa5faf6c07aa3e1') + +GENFORCE_MODELS = { + # ===[ ProgGAN ]=== + 'pggan_celebahq1024': ('pggan_celebahq1024.pth', 1024), + 'pggan_church256': ('pggan_church256.pth', 256), + 'pggan_car256': ('pggan_car256.pth', 256), + # ===[ StyleGAN2 ]=== + 'stylegan2_ffhq1024': ('stylegan2_ffhq1024.pth', 1024), + 'stylegan2_afhqcat512': ('stylegan2_afhqcat512.pth', 512), + 'stylegan2_afhqdog512': ('stylegan2_afhqdog512.pth', 512), + 'stylegan2_car512': ('stylegan2_car512.pth', 512), + 'stylegan2_church256': ('stylegan2_church256.pth', 256) +} + +STYLEGAN_LAYERS = { + 'stylegan2_ffhq1024': 18, + 'stylegan2_afhqcat512': 16, + 'stylegan2_afhqdog512': 16, + 'stylegan2_car512': 16, + 'stylegan2_church256': 14, +} diff --git a/ContraCLIP/lib/data.py b/ContraCLIP/lib/data.py new file mode 100644 index 0000000000000000000000000000000000000000..18dad70f079cb041cce53961eeea2938e12a65fe --- /dev/null +++ b/ContraCLIP/lib/data.py @@ -0,0 +1,25 @@ +import torch +import os.path as osp +import glob +import cv2 +import numpy as np +from torch.utils import data + + +class PathImages(data.Dataset): + def __init__(self, root_path): + self.images_files = glob.glob(osp.join(root_path, '*.jpg')) + self.images_files.sort() + + def __len__(self): + return len(self.images_files) + + def __getitem__(self, index): + return self.image2tensor(self.images_files[index]) + + @staticmethod + def image2tensor(image_file): + # Open image in BGR order and convert to RBG order + img = cv2.imread(image_file, cv2.IMREAD_COLOR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype('uint8') + return torch.tensor(np.transpose(img, (2, 0, 1))).float() diff --git a/ContraCLIP/lib/evaluation/archface/arcface.py b/ContraCLIP/lib/evaluation/archface/arcface.py new file mode 100644 index 0000000000000000000000000000000000000000..ce76c7260d6a770e3079c5c3567de2ab36ff5a5b --- /dev/null +++ b/ContraCLIP/lib/evaluation/archface/arcface.py @@ -0,0 +1,164 @@ +from torch import nn +from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, MaxPool2d, \ + AdaptiveAvgPool2d, Sequential, Module +import torch +from collections import namedtuple + + +class IDComparator(nn.Module): + def __init__(self): + super(IDComparator, self).__init__() + self.backbone = SE_IR(50, drop_ratio=0.4, mode='ir_se') + self.backbone.load_state_dict(torch.load('models/pretrained/arcface/model_ir_se50.pth')) + self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112)) + self.criterion = nn.CosineSimilarity(dim=1, eps=1e-6) + + def extract_feats(self, x): + # Crop interesting region + x = x[:, :, 35:223, 32:220] + return self.backbone(self.face_pool(x)) + + def forward(self, x, x_prime): + return self.criterion(self.extract_feats(x), self.extract_feats(x_prime)).mean() + + +######################################################################################################################## +## ## +## [ Original Arcface Model ] ## +## ## +######################################################################################################################## +class Flatten(Module): + @staticmethod + def forward(x): + return x.view(x.size(0), -1) + + +def l2_norm(x, axis=1): + norm = torch.norm(x, 2, axis, True) + output = torch.div(x, norm) + return output + + +class SEModule(Module): + def __init__(self, channels, reduction): + super(SEModule, self).__init__() + self.avg_pool = AdaptiveAvgPool2d(1) + self.fc1 = Conv2d( + channels, channels // reduction, kernel_size=(1, 1), padding=0, bias=False) + self.relu = ReLU(inplace=True) + self.fc2 = Conv2d( + channels // reduction, channels, kernel_size=(1, 1), padding=0, bias=False) + self.sigmoid = Sigmoid() + + def forward(self, x): + module_input = x + x = self.avg_pool(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.sigmoid(x) + return module_input * x + + +class bottleneck_IR(Module): + def __init__(self, in_channel, depth, stride): + super(bottleneck_IR, self).__init__() + if in_channel == depth: + self.shortcut_layer = MaxPool2d(1, stride) + else: + self.shortcut_layer = Sequential( + Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth)) + self.res_layer = Sequential( + BatchNorm2d(in_channel), + Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth), + Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)) + + def forward(self, x): + shortcut = self.shortcut_layer(x) + res = self.res_layer(x) + return res + shortcut + + +class bottleneck_IR_SE(Module): + def __init__(self, in_channel, depth, stride): + super(bottleneck_IR_SE, self).__init__() + if in_channel == depth: + self.shortcut_layer = MaxPool2d(1, stride) + else: + self.shortcut_layer = Sequential( + Conv2d(in_channel, depth, (1, 1), stride, bias=False), + BatchNorm2d(depth)) + self.res_layer = Sequential( + BatchNorm2d(in_channel), + Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), + PReLU(depth), + Conv2d(depth, depth, (3, 3), stride, 1, bias=False), + BatchNorm2d(depth), + SEModule(depth, 16) + ) + + def forward(self, x): + shortcut = self.shortcut_layer(x) + res = self.res_layer(x) + return res + shortcut + + +class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): + """A named tuple describing a ResNet block.""" + + +def get_block(in_channel, depth, num_units, stride=2): + return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for _ in range(num_units - 1)] + + +def get_blocks(num_layers): + if num_layers == 50: + return [get_block(in_channel=64, depth=64, num_units=3), + get_block(in_channel=64, depth=128, num_units=4), + get_block(in_channel=128, depth=256, num_units=14), + get_block(in_channel=256, depth=512, num_units=3)] + + elif num_layers == 100: + return [get_block(in_channel=64, depth=64, num_units=3), + get_block(in_channel=64, depth=128, num_units=13), + get_block(in_channel=128, depth=256, num_units=30), + get_block(in_channel=256, depth=512, num_units=3)] + elif num_layers == 152: + return [get_block(in_channel=64, depth=64, num_units=3), + get_block(in_channel=64, depth=128, num_units=8), + get_block(in_channel=128, depth=256, num_units=36), + get_block(in_channel=256, depth=512, num_units=3)] + + +class SE_IR(Module): + def __init__(self, num_layers, drop_ratio=0.4, mode='ir_se'): + super(SE_IR, self).__init__() + assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' + assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' + + self.input_layer = Sequential(Conv2d(3, 64, (3, 3), (1, 1), 1, bias=False), + BatchNorm2d(64), + PReLU(64)) + self.output_layer = Sequential(BatchNorm2d(512), + Dropout(drop_ratio), + Flatten(), + Linear(512 * 7 * 7, 512), + BatchNorm1d(512)) + modules = [] + blocks = get_blocks(num_layers) + if mode == 'ir': + for block in blocks: + for bottleneck in block: + modules.append(bottleneck_IR(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)) + elif mode == 'ir_se': + for block in blocks: + for bottleneck in block: + modules.append(bottleneck_IR_SE(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)) + + self.body = Sequential(*modules) + + def forward(self, x): + x = self.input_layer(x) + x = self.body(x) + x = self.output_layer(x) + return l2_norm(x) diff --git a/ContraCLIP/lib/evaluation/au_detector/AU_detector.py b/ContraCLIP/lib/evaluation/au_detector/AU_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..a2126e9882f850a2e3da95edb1ec4ebfa9af5733 --- /dev/null +++ b/ContraCLIP/lib/evaluation/au_detector/AU_detector.py @@ -0,0 +1,46 @@ +import torch +from .hourglass import FANAU + + +class Model: + def __init__(self, npts=12, corenet='pretrained_models/disfa_adaptation_f0.pth', use_cuda=True): + self.FAN = FANAU(num_modules=1, n_points=npts) + self.FAN.load_state_dict(torch.load(corenet, map_location='cpu')['state_dict']) + self.FAN.eval() + if use_cuda: + self.FAN.cuda() + + def __call__(self, x): + H = self.FAN(x) + H = H if H.__class__.__name__ == 'Tensor' else H[-1] + return H + + def _forward_FAN(self, images): + with torch.no_grad(): + self.FAN.eval() + H = self.FAN(images) + return H + + def forward_FAN(self, images): + H = self.FAN(images) + return H + + +class AUdetector: + def __init__(self, au_model_path='models/pretrained/au_detector/disfa_adaptation_f0.pth', use_cuda=True): + self.naus = 12 + self.AUdetector = Model(npts=self.naus, corenet=au_model_path, use_cuda=use_cuda) + self.use_cuda = use_cuda + + def detect_AU(self, img): + img_normalized = (img - img.min()) / (img.max() - img.min()) + if self.use_cuda: + img_normalized = img_normalized.cuda() + + if img_normalized.ndim == 3: + img_normalized = img_normalized.unsqueeze(0) + + heatmaps = self.AUdetector.forward_FAN(img_normalized) + intensities = torch.nn.MaxPool2d((64, 64))(heatmaps).squeeze(2).squeeze(2) + + return intensities diff --git a/ContraCLIP/lib/evaluation/au_detector/hourglass.py b/ContraCLIP/lib/evaluation/au_detector/hourglass.py new file mode 100644 index 0000000000000000000000000000000000000000..4913fecf3d827d5b779d1a351a4b86e90d89f091 --- /dev/null +++ b/ContraCLIP/lib/evaluation/au_detector/hourglass.py @@ -0,0 +1,243 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.nn.init as weight_init + + +def conv3x3(in_planes, out_planes, stride=1, padding=1, bias=False): + """3x3 convolution with padding.""" + return nn.Conv2d(in_planes, out_planes, kernel_size=(3, 3), stride=(stride, stride), padding=padding, bias=bias) + + +def conv1x1(in_planes, out_planes, stride=1, padding=0, bias=False): + """1x1 convolution with padding.""" + return nn.Conv2d(in_planes, out_planes, kernel_size=(1, 1), stride=(stride, stride), padding=padding, bias=bias) + + +class ConvBlock(nn.Module): + def __init__(self, in_planes, out_planes, lightweight = False): + super(ConvBlock, self).__init__() + + if lightweight: + self.conv1 = conv1x1(in_planes, int(out_planes / 2)) + self.conv2 = conv1x1(int(out_planes / 2), int(out_planes / 4)) + self.conv3 = conv1x1(int(out_planes / 4), int(out_planes / 4)) + else: + self.conv1 = conv3x3(in_planes, int(out_planes / 2)) + self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) + self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4)) + + self.bn1 = nn.BatchNorm2d(int(out_planes / 2)) + self.bn2 = nn.BatchNorm2d(int(out_planes / 4)) + self.bn3 = nn.BatchNorm2d(int(out_planes / 4)) + + if in_planes != out_planes: + self.downsample = nn.Sequential( + nn.Conv2d(in_planes, out_planes, kernel_size=(1, 1), stride=(1, 1), bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(True), + ) + else: + self.downsample = None + + def forward(self, x): + residual = x + + out1 = self.conv1(x) + out1 = self.bn1(out1) + out1 = F.relu6(out1, True) + + out2 = self.conv2(out1) + out2 = self.bn2(out2) + out2 = F.relu6(out2, True) + + out3 = self.conv3(out2) + out3 = self.bn3(out3) + out3 = F.relu6(out3, True) + + out3 = torch.cat((out1, out2, out3), 1) + + if self.downsample is not None: + residual = self.downsample(residual) + + out3 += residual + + return out3 + + +class HourGlass(nn.Module): + def __init__(self, num_modules, depth, num_features, lightweight = False): + super(HourGlass, self).__init__() + self.num_modules = num_modules + self.depth = depth + self.features = num_features + self.lightweight = lightweight + self._generate_network(self.depth) + + def _generate_network(self, level): + self.add_module('b1_' + str(level), ConvBlock(self.features, self.features, lightweight=self.lightweight)) + + self.add_module('b2_' + str(level), ConvBlock(self.features, self.features)) + + if level > 1: + self._generate_network(level - 1) + else: + self.add_module('b2_plus_' + str(level), ConvBlock(self.features, self.features)) + + self.add_module('b3_' + str(level), ConvBlock(self.features, self.features)) + + def _forward(self, level, inp): + # Upper branch + up1 = inp + up1 = self._modules['b1_' + str(level)](up1) + + # Lower branch + low1 = F.max_pool2d(inp, 2, stride=2) + low1 = self._modules['b2_' + str(level)](low1) + + if level > 1: + low2 = self._forward(level - 1, low1) + else: + low2 = low1 + low2 = self._modules['b2_plus_' + str(level)](low2) + + low3 = low2 + low3 = self._modules['b3_' + str(level)](low3) + + up2 = F.interpolate(low3, scale_factor=2, mode='nearest') + + return up1 + up2 + + def forward(self, x): + return self._forward(self.depth, x) + + +class QFAN(nn.Module): + def __init__(self, num_modules=1, num_in=3, num_features = 128, num_out=68, return_features=False): + super(QFAN, self).__init__() + self.num_modules = num_modules + self.num_in = num_in + self.num_features = num_features + self.num_out = num_out + self.return_features = return_features + + # Base part + self.conv1 = nn.Conv2d(self.num_in, int(self.num_features / 2), kernel_size=(7, 7), stride=(2, 2), padding=3) + self.bn1 = nn.BatchNorm2d(int(self.num_features / 2)) + self.conv2 = ConvBlock(int(self.num_features / 2), int(self.num_features / 2)) + self.conv3 = ConvBlock(int(self.num_features / 2), self.num_features) + self.conv4 = ConvBlock(self.num_features, self.num_features) + + # Stacking part + for hg_module in range(self.num_modules): + self.add_module('m' + str(hg_module), HourGlass(1, 4, self.num_features)) + self.add_module('top_m_' + str(hg_module), ConvBlock(self.num_features, self.num_features)) + self.add_module('conv_last' + str(hg_module), + nn.Conv2d(self.num_features, self.num_features, kernel_size=(1, 1), stride=(1, 1), + padding=0)) + self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(self.num_features)) + self.add_module('l' + str(hg_module), nn.Conv2d(self.num_features, + self.num_out, kernel_size=(1, 1), stride=(1, 1), padding=0)) + + if hg_module < self.num_modules - 1: + self.add_module( + 'bl' + str(hg_module), nn.Conv2d(self.num_features, self.num_features, kernel_size=(1, 1), + stride=(1, 1), padding=0)) + self.add_module('al' + str(hg_module), nn.Conv2d(self.num_out, self.num_features, kernel_size=(1, 1), + stride=(1, 1), padding=0)) + + def forward(self, x): + features = [] + x = F.relu(self.bn1(self.conv1(x)), True) + x = F.max_pool2d(self.conv2(x), 2, stride=2) + x = self.conv3(x) + x = self.conv4(x) + if self.return_features: + features.append(x) + + previous = x + + outputs = [] + for i in range(self.num_modules): + hg = self._modules['m' + str(i)](previous) + + ll = hg + ll = self._modules['top_m_' + str(i)](ll) + + ll = F.relu(self._modules['bn_end' + str(i)] + (self._modules['conv_last' + str(i)](ll)), True) + + # Predict heatmaps + tmp_out = self._modules['l' + str(i)](ll) + outputs.append(tmp_out) + + if i < self.num_modules - 1: + ll = self._modules['bl' + str(i)](ll) + tmp_out_ = self._modules['al' + str(i)](tmp_out) + previous = previous + ll + tmp_out_ + + if self.return_features: + return outputs, features + else: + return outputs + + +def init_weights(net, init_type='normal', gain=0.02): + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + weight_init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + weight_init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + weight_init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + weight_init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + weight_init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: + weight_init.normal_(m.weight.data, 1.0, gain) + weight_init.constant_(m.bias.data, 0.0) + + net.apply(init_func) + + + +class FANAU(nn.Module): + def __init__(self, num_modules=1, num_features = 128, n_points=66, block=ConvBlock): + super(FANAU, self).__init__() + self.num_modules = 1 + self.num_features = num_features + self.fan = QFAN(num_modules = self.num_modules, return_features=True) + block = eval(block) if isinstance(block,str) else block + + # input features + self.conv1 = nn.Sequential(nn.Conv2d(68, self.num_features, 1, 1), nn.BatchNorm2d(self.num_features), nn.ReLU6()) + self.conv2 = nn.Sequential(nn.Conv2d(self.num_features, self.num_features, 1, 1), + nn.BatchNorm2d(self.num_features), nn.ReLU6()) + + self.net = HourGlass(1,4, self.num_features, lightweight=True) + self.conv_last = nn.Sequential(nn.Conv2d(self.num_features, self.num_features, 1, 1), + nn.BatchNorm2d(self.num_features), nn.ReLU6()) + self.l = nn.Conv2d(self.num_features, n_points, 1, 1) + + init_weights(self) + + def forward(self, x): + self.fan.eval() + # with torch.no_grad(): + output, features = self.fan(x) + # print(len(output), len(features)) + # print(output[0].shape, features[0].shape) + + out = output[-1] + x = self.conv1(out) + self.conv2(features[0]) + x = self.net(x) + x = self.conv_last(x) + x = self.l(x) + # print(x.shape) + # quit() + return x diff --git a/ContraCLIP/lib/evaluation/celeba_attributes/attributes_5.json b/ContraCLIP/lib/evaluation/celeba_attributes/attributes_5.json new file mode 100644 index 0000000000000000000000000000000000000000..362fd206b4537ff188bcd5838eb559624853b490 --- /dev/null +++ b/ContraCLIP/lib/evaluation/celeba_attributes/attributes_5.json @@ -0,0 +1,63 @@ + +{ + "attr_info":{ + "6": { + "name": "Bangs", + "value":[0, 1, 2, 3, 4, 5], + "idx_scale": 1, + "idx_bias": 0 + }, + "16": { + "name": "Eyeglasses", + "value":[0, 1, 2, 3, 4, 5], + "idx_scale": 1, + "idx_bias": 0 + }, + "25": { + "name": "No_Beard", + "value":[0, 1, 2, 3, 4, 5], + "idx_scale": -1, + "idx_bias": 5 + }, + "32": { + "name": "Smiling", + "value":[0, 1, 2, 3, 4, 5], + "idx_scale": 1, + "idx_bias": 0 + }, + "40": { + "name": "Young", + "value":[0, 1, 2, 3, 4, 5], + "idx_scale": -1, + "idx_bias": 5 + } + }, + "newIdx_to_attrIdx":{ + "0": "6", + "1": "16", + "2": "25", + "3": "32", + "4": "40" + }, + "newIdx_to_attrName":{ + "0": "Bangs", + "1": "Eyeglasses", + "2": "No_Beard", + "3": "Smiling", + "4": "Young" + }, + "attrName_to_newIdx":{ + "Bangs": "0", + "Eyeglasses": "1", + "No_Beard": "2", + "Smiling": "3", + "Young": "4" + }, + "attrIdx_to_newIdx":{ + "6": 0, + "16": 1, + "25": 2, + "32": 3, + "40": 4 + } +} \ No newline at end of file diff --git a/ContraCLIP/lib/evaluation/celeba_attributes/celeba_attr_predictor.py b/ContraCLIP/lib/evaluation/celeba_attributes/celeba_attr_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..8d31345d5b84a4a39f0183b4fbd720864d54c8a3 --- /dev/null +++ b/ContraCLIP/lib/evaluation/celeba_attributes/celeba_attr_predictor.py @@ -0,0 +1,206 @@ +import json +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=(3, 3), stride=(stride, stride), padding=1, bias=False) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=(1, 1), stride=(stride, stride), bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(inplanes, planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, planes * self.expansion) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class fc_block(nn.Module): + def __init__(self, inplanes, planes, drop_rate=0.15): + super(fc_block, self).__init__() + self.fc = nn.Linear(inplanes, planes) + self.bn = nn.BatchNorm1d(planes) + if drop_rate > 0: + self.dropout = nn.Dropout(drop_rate) + self.relu = nn.ReLU(inplace=True) + self.drop_rate = drop_rate + + def forward(self, x): + x = self.fc(x) + x = self.bn(x) + if self.drop_rate > 0: + x = self.dropout(x) + x = self.relu(x) + return x + + +class ResNet(nn.Module): + def __init__(self, + block, + layers, + attr_file, + zero_init_residual=False, + dropout_rate=0): + super(ResNet, self).__init__() + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.stem = fc_block(512 * block.expansion, 512, dropout_rate) + + # Construct classifier heads according to the number of values of each attribute + self.attr_file = attr_file + with open(self.attr_file, 'r') as f: + attr_f = json.load(f) + self.attr_info = attr_f['attr_info'] + for idx, (key, val) in enumerate(self.attr_info.items()): + num_val = int(len(val["value"])) + setattr(self, 'classifier' + str(key).zfill(2) + val["name"], + nn.Sequential(fc_block(512, 256, dropout_rate), nn.Linear(256, num_val))) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, so that the residual branch starts with zeros, and each + # residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential(conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d(planes * block.expansion)) + + layers = [block(self.inplanes, planes, stride, downsample)] + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.stem(x) + + predictions = {} + for idx, (key, val) in enumerate(self.attr_info.items()): + classifier = getattr(self, 'classifier' + str(key).zfill(2) + val["name"]) + predictions.update({val["name"]: classifier(x)}) + + return predictions + + +def celeba_attr_predictor(attr_file, pretrained='models/pretrained/celeba_attributes/predictor_1024.pth.tar'): + model = ResNet(Bottleneck, [3, 4, 6, 3], attr_file=attr_file) + init_pretrained_weights(model, 'https://download.pytorch.org/models/resnet50-19c8e357.pth') + model.load_state_dict(torch.load(pretrained)['state_dict'], strict=True) + return model + + +def init_pretrained_weights(model, model_url): + """Initialize model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept + unchanged. + """ + pretrain_dict = model_zoo.load_url(model_url) + model_dict = model.state_dict() + pretrain_dict = { + k: v + for k, v in pretrain_dict.items() + if k in model_dict and model_dict[k].size() == v.size() + } + model_dict.update(pretrain_dict) + model.load_state_dict(model_dict) diff --git a/ContraCLIP/lib/evaluation/data.py b/ContraCLIP/lib/evaluation/data.py new file mode 100644 index 0000000000000000000000000000000000000000..18dad70f079cb041cce53961eeea2938e12a65fe --- /dev/null +++ b/ContraCLIP/lib/evaluation/data.py @@ -0,0 +1,25 @@ +import torch +import os.path as osp +import glob +import cv2 +import numpy as np +from torch.utils import data + + +class PathImages(data.Dataset): + def __init__(self, root_path): + self.images_files = glob.glob(osp.join(root_path, '*.jpg')) + self.images_files.sort() + + def __len__(self): + return len(self.images_files) + + def __getitem__(self, index): + return self.image2tensor(self.images_files[index]) + + @staticmethod + def image2tensor(image_file): + # Open image in BGR order and convert to RBG order + img = cv2.imread(image_file, cv2.IMREAD_COLOR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype('uint8') + return torch.tensor(np.transpose(img, (2, 0, 1))).float() diff --git a/ContraCLIP/lib/evaluation/sfd/__init__.py b/ContraCLIP/lib/evaluation/sfd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fef5689f34b51c4073987b123b182025d233c995 --- /dev/null +++ b/ContraCLIP/lib/evaluation/sfd/__init__.py @@ -0,0 +1 @@ +from .sfd_detector import SFDDetector as FaceDetector diff --git a/ContraCLIP/lib/evaluation/sfd/bbox.py b/ContraCLIP/lib/evaluation/sfd/bbox.py new file mode 100644 index 0000000000000000000000000000000000000000..45cd438bbcc3ad5047b14e141b23f76cf2624e83 --- /dev/null +++ b/ContraCLIP/lib/evaluation/sfd/bbox.py @@ -0,0 +1,111 @@ +from __future__ import print_function +import os +import sys +import cv2 +import random +import datetime +import time +import math +import argparse +import numpy as np +import torch + +try: + from iou import IOU +except BaseException: + # IOU cython speedup 10x + def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2): + sa = abs((ax2 - ax1) * (ay2 - ay1)) + sb = abs((bx2 - bx1) * (by2 - by1)) + x1, y1 = max(ax1, bx1), max(ay1, by1) + x2, y2 = min(ax2, bx2), min(ay2, by2) + w = x2 - x1 + h = y2 - y1 + if w < 0 or h < 0: + return 0.0 + else: + return 1.0 * w * h / (sa + sb - w * h) + + +def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh): + xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1 + dx, dy = (xc - axc) / aww, (yc - ayc) / ahh + dw, dh = math.log(ww / aww), math.log(hh / ahh) + return dx, dy, dw, dh + + +def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh): + xc, yc = dx * aww + axc, dy * ahh + ayc + ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh + x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2 + return x1, y1, x2, y2 + + +def nms(dets, thresh): + # print(dets) + if 0 == len(dets): + return [] + x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] + # print(x1,x2,y1,y2) + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]]) + xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]]) + + w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1) + ovr = w * h / (areas[i] + areas[order[1:]] - w * h) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep + + +def encode(matched, priors, variances): + """Encode the variances from the priorbox layers into the ground truth boxes + we have matched (based on jaccard overlap) with the prior boxes. + Args: + matched: (tensor) Coords of ground truth for each prior in point-form + Shape: [num_priors, 4]. + priors: (tensor) Prior boxes in center-offset form + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + encoded boxes (tensor), Shape: [num_priors, 4] + """ + + # dist b/t match center and prior's center + g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] + # encode variance + g_cxcy /= (variances[0] * priors[:, 2:]) + # match wh / prior wh + g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] + g_wh = torch.log(g_wh) / variances[1] + # return target for smooth_l1_loss + return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] + + +def decode(loc, priors, variances): + """Decode locations from predictions using priors to undo + the encoding we did for offset regression at train time. + Args: + loc (tensor): location predictions for loc layers, + Shape: [num_priors,4] + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors,4]. + variances: (list[float]) Variances of priorboxes + Return: + decoded bounding box predictions + """ + + boxes = torch.cat(( + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) + boxes[:, :2] -= boxes[:, 2:] / 2 + boxes[:, 2:] += boxes[:, :2] + return boxes diff --git a/ContraCLIP/lib/evaluation/sfd/core.py b/ContraCLIP/lib/evaluation/sfd/core.py new file mode 100644 index 0000000000000000000000000000000000000000..608af18a3b44cd7801881f7d2bff594eb06f52d4 --- /dev/null +++ b/ContraCLIP/lib/evaluation/sfd/core.py @@ -0,0 +1,152 @@ +import logging +import glob +from tqdm import tqdm +import numpy as np +import torch +import cv2 +from skimage import io + + +class FaceDetector(object): + """An abstract class representing a face detector. + + Any other face detection implementation must subclass it. All subclasses + must implement ``detect_from_image``, that return a list of detected + bounding boxes. Optionally, for speed considerations detect from path is + recommended. + """ + + def __init__(self, device, verbose): + self.device = device + self.verbose = verbose + + # if verbose: + # if 'cpu' in device: + # logger = logging.getLogger(__name__) + # logger.warning("Detection running on CPU, this may be potentially slow.") + + # if 'cpu' not in device and 'cuda' not in device: + # if verbose: + # logger.error("Expected values for device are: {cpu, cuda} but got: %s", device) + # raise ValueError + + def detect_from_image(self, tensor_or_path): + """Detects faces in a given image. + + This function detects the faces present in a provided BGR(usually) + image. The input can be either the image itself or the path to it. + + Arguments: + tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path + to an image or the image itself. + + Example:: + + >>> path_to_image = 'data/image_01.jpg' + ... detected_faces = detect_from_image(path_to_image) + [A list of bounding boxes (x1, y1, x2, y2)] + >>> image = cv2.imread(path_to_image) + ... detected_faces = detect_from_image(image) + [A list of bounding boxes (x1, y1, x2, y2)] + + """ + raise NotImplementedError + + def detect_from_batch(self, tensor): + """Detects faces in a given image. + + This function detects the faces present in a provided BGR(usually) + image. The input can be either the image itself or the path to it. + + Arguments: + tensor {torch.tensor} -- image batch tensor. + + Example:: + + >>> path_to_image = 'data/image_01.jpg' + ... detected_faces = detect_from_image(path_to_image) + [A list of bounding boxes (x1, y1, x2, y2)] + >>> image = cv2.imread(path_to_image) + ... detected_faces = detect_from_image(image) + [A list of bounding boxes (x1, y1, x2, y2)] + + """ + raise NotImplementedError + + def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True): + """Detects faces from all the images present in a given directory. + + Arguments: + path {string} -- a string containing a path that points to the folder containing the images + + Keyword Arguments: + extensions {list} -- list of string containing the extensions to be + consider in the following format: ``.extension_name`` (default: + {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the + folder recursively (default: {False}) show_progress_bar {bool} -- + display a progressbar (default: {True}) + + Example: + >>> directory = 'data' + ... detected_faces = detect_from_directory(directory) + {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]} + + """ + if self.verbose: + logger = logging.getLogger(__name__) + + if len(extensions) == 0: + if self.verbose: + logger.error("Expected at list one extension, but none was received.") + raise ValueError + + if self.verbose: + logger.info("Constructing the list of images.") + additional_pattern = '/**/*' if recursive else '/*' + files = [] + for extension in extensions: + files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive)) + + if self.verbose: + logger.info("Finished searching for images. %s images found", len(files)) + logger.info("Preparing to run the detection.") + + predictions = {} + for image_path in tqdm(files, disable=not show_progress_bar): + if self.verbose: + logger.info("Running the face detector on image: %s", image_path) + predictions[image_path] = self.detect_from_image(image_path) + + if self.verbose: + logger.info("The detector was successfully run on all %s images", len(files)) + + return predictions + + @property + def reference_scale(self): + raise NotImplementedError + + @property + def reference_x_shift(self): + raise NotImplementedError + + @property + def reference_y_shift(self): + raise NotImplementedError + + @staticmethod + def tensor_or_path_to_ndarray(tensor_or_path, rgb=True): + """Convert path (represented as a string) or torch.tensor to a numpy.ndarray + + Arguments: + tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself + """ + if isinstance(tensor_or_path, str): + return cv2.imread(tensor_or_path) if not rgb else io.imread(tensor_or_path) + elif torch.is_tensor(tensor_or_path): + # Call cpu in case its coming from cuda + return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy() + elif isinstance(tensor_or_path, np.ndarray): + return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path + else: + raise TypeError diff --git a/ContraCLIP/lib/evaluation/sfd/detect.py b/ContraCLIP/lib/evaluation/sfd/detect.py new file mode 100644 index 0000000000000000000000000000000000000000..c39e5805e6c34580f7375d2f780dbfb9f84c7f27 --- /dev/null +++ b/ContraCLIP/lib/evaluation/sfd/detect.py @@ -0,0 +1,94 @@ +import torch +import torch.nn.functional as F + +import os +import sys +import cv2 +import random +import datetime +import math +import argparse +import numpy as np + +import scipy.io as sio +import zipfile +from .net_s3fd import s3fd +from .bbox import * +import matplotlib.pyplot as plt + + +def detect(net, img, device): + img = img - np.array([104, 117, 123]) + img = img.transpose(2, 0, 1) + img = img.reshape((1,) + img.shape) + + # if torch.cuda.current_device() == 0: + # torch.backends.cudnn.benchmark = True + + img = torch.from_numpy(img).float().to(device) + + return batch_detect(net, img, device) + + +def batch_detect(net, img_batch, device): + """ + Inputs: + - img_batch: a torch.Tensor of shape (Batch size, Channels, Height, Width) + """ + # if torch.cuda.current_device() == 0: + # torch.backends.cudnn.benchmark = True + + BB, CC, HH, WW = img_batch.size() + with torch.no_grad(): + olist = net(img_batch.float()) # patched uint8_t overflow error + + for i in range(len(olist) // 2): + olist[i * 2] = F.softmax(olist[i * 2], dim=1) + + bboxlists = [] + olist = [oelem.data.cpu() for oelem in olist] + for j in range(BB): + bboxlist = [] + for i in range(len(olist) // 2): + ocls, oreg = olist[i * 2], olist[i * 2 + 1] + FB, FC, FH, FW = ocls.size() # feature map size + stride = 2**(i + 2) # 4,8,16,32,64,128 + anchor = stride * 4 + poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) + + for Iindex, hindex, windex in poss: + axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride + score = ocls[j, 1, hindex, windex] + loc = oreg[j, :, hindex, windex].contiguous().view(1, 4) + priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]) + variances = [0.1, 0.2] + box = decode(loc, priors, variances) + x1, y1, x2, y2 = box[0] * 1.0 + bboxlist.append([x1, y1, x2, y2, score]) + bboxlists.append(bboxlist) + + bboxlists = np.array(bboxlists) + + if 0 == len(bboxlists): + bboxlists = np.zeros((1, 1, 5)) + + return bboxlists + + +def flip_detect(net, img, device): + img = cv2.flip(img, 1) + b = detect(net, img, device) + + bboxlist = np.zeros(b.shape) + bboxlist[:, 0] = img.shape[1] - b[:, 2] + bboxlist[:, 1] = b[:, 1] + bboxlist[:, 2] = img.shape[1] - b[:, 0] + bboxlist[:, 3] = b[:, 3] + bboxlist[:, 4] = b[:, 4] + return bboxlist + + +def pts_to_bb(pts): + min_x, min_y = np.min(pts, axis=0) + max_x, max_y = np.max(pts, axis=0) + return np.array([min_x, min_y, max_x, max_y]) diff --git a/ContraCLIP/lib/evaluation/sfd/net_s3fd.py b/ContraCLIP/lib/evaluation/sfd/net_s3fd.py new file mode 100644 index 0000000000000000000000000000000000000000..152575b18fd90fb2ee8aff7e9e6bd7afbd1e4082 --- /dev/null +++ b/ContraCLIP/lib/evaluation/sfd/net_s3fd.py @@ -0,0 +1,129 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class L2Norm(nn.Module): + def __init__(self, n_channels, scale=1.0): + super(L2Norm, self).__init__() + self.n_channels = n_channels + self.scale = scale + self.eps = 1e-10 + self.weight = nn.Parameter(torch.Tensor(self.n_channels)) + self.weight.data *= 0.0 + self.weight.data += self.scale + + def forward(self, x): + norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps + x = x / norm * self.weight.view(1, -1, 1, 1) + return x + + +class s3fd(nn.Module): + def __init__(self): + super(s3fd, self).__init__() + self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) + self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1) + + self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) + self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) + + self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) + self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) + + self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1) + self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + + self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) + + self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3) + self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) + + self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) + self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1) + + self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0) + self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) + + self.conv3_3_norm = L2Norm(256, scale=10) + self.conv4_3_norm = L2Norm(512, scale=8) + self.conv5_3_norm = L2Norm(512, scale=5) + + self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) + self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) + self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + + self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1) + self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1) + self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) + self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) + self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1) + self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + h = F.relu(self.conv1_1(x)) + h = F.relu(self.conv1_2(h)) + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv2_1(h)) + h = F.relu(self.conv2_2(h)) + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv3_1(h)) + h = F.relu(self.conv3_2(h)) + h = F.relu(self.conv3_3(h)) + f3_3 = h + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv4_1(h)) + h = F.relu(self.conv4_2(h)) + h = F.relu(self.conv4_3(h)) + f4_3 = h + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.conv5_1(h)) + h = F.relu(self.conv5_2(h)) + h = F.relu(self.conv5_3(h)) + f5_3 = h + h = F.max_pool2d(h, 2, 2) + + h = F.relu(self.fc6(h)) + h = F.relu(self.fc7(h)) + ffc7 = h + h = F.relu(self.conv6_1(h)) + h = F.relu(self.conv6_2(h)) + f6_2 = h + h = F.relu(self.conv7_1(h)) + h = F.relu(self.conv7_2(h)) + f7_2 = h + + f3_3 = self.conv3_3_norm(f3_3) + f4_3 = self.conv4_3_norm(f4_3) + f5_3 = self.conv5_3_norm(f5_3) + + cls1 = self.conv3_3_norm_mbox_conf(f3_3) + reg1 = self.conv3_3_norm_mbox_loc(f3_3) + cls2 = self.conv4_3_norm_mbox_conf(f4_3) + reg2 = self.conv4_3_norm_mbox_loc(f4_3) + cls3 = self.conv5_3_norm_mbox_conf(f5_3) + reg3 = self.conv5_3_norm_mbox_loc(f5_3) + cls4 = self.fc7_mbox_conf(ffc7) + reg4 = self.fc7_mbox_loc(ffc7) + cls5 = self.conv6_2_mbox_conf(f6_2) + reg5 = self.conv6_2_mbox_loc(f6_2) + cls6 = self.conv7_2_mbox_conf(f7_2) + reg6 = self.conv7_2_mbox_loc(f7_2) + + # max-out background label + chunk = torch.chunk(cls1, 4, 1) + bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2]) + cls1 = torch.cat([bmax, chunk[3]], dim=1) + + return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6] diff --git a/ContraCLIP/lib/evaluation/sfd/sfd_detector.py b/ContraCLIP/lib/evaluation/sfd/sfd_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..9848b8fe4f5b587c5a909b10dd0afb64bcb51534 --- /dev/null +++ b/ContraCLIP/lib/evaluation/sfd/sfd_detector.py @@ -0,0 +1,53 @@ +from .core import FaceDetector +from .detect import * + + +class SFDDetector(FaceDetector): + def __init__(self, path_to_detector=None, device="cuda", verbose=False): + super(SFDDetector, self).__init__(device, verbose) + self.device = device + self.face_detector = s3fd() + self.face_detector.load_state_dict(torch.load(path_to_detector)) + self.face_detector.eval() + if self.device == "cuda": + self.face_detector.cuda() + + def detect_from_image(self, tensor_or_path): + image = self.tensor_or_path_to_ndarray(tensor_or_path) + bboxlist = detect(self.face_detector, image, device=self.device)[0] + keep = nms(bboxlist, 0.3) + bboxlist = bboxlist[keep, :] + bboxlist = [x for x in bboxlist if x[-1] > 0.5] + + return bboxlist + + def detect_from_batch(self, tensor): + bboxlists = batch_detect(self.face_detector, tensor, device=self.device) + error = False + new_bboxlists = [] + error_index = -1 + for i in range(bboxlists.shape[0]): + bboxlist = bboxlists[i] + keep = nms(bboxlist, 0.3) + if len(keep) > 0: + bboxlist = bboxlist[keep, :] + bboxlist = [x for x in bboxlist if x[-1] > 0.5] + new_bboxlists.append(bboxlist) + else: + error = True + error_index = i + new_bboxlists.append([]) + + return new_bboxlists, error, error_index + + @property + def reference_scale(self): + return 195 + + @property + def reference_x_shift(self): + return 0 + + @property + def reference_y_shift(self): + return 0 diff --git a/ContraCLIP/lib/support_sets.py b/ContraCLIP/lib/support_sets.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a7743b212b82ddd04756ac140dec61ed548e14 --- /dev/null +++ b/ContraCLIP/lib/support_sets.py @@ -0,0 +1,152 @@ +import torch +from torch import nn +import numpy as np + + +class SupportSets(nn.Module): + def __init__(self, prompt_features=None, num_support_sets=None, num_support_dipoles=None, support_vectors_dim=None, + lss_beta=0.5, css_beta=0.5, jung_radius=None): + """SupportSets class constructor. + + Args: + prompt_features (torch.Tensor) : CLIP text feature statistics of prompts from the given corpus + num_support_sets (int) : number of support sets (each one defining a warping function) + num_support_dipoles (int) : number of support dipoles per support set (per warping function) + support_vectors_dim (int) : dimensionality of support vectors (latent space dimensionality, z_dim) + lss_beta (float) : set beta parameter for initializing latent space RBFs' gamma parameters + (0.25 < lss_beta < 1.0) + css_beta (float) : set beta parameter for fixing CLIP space RBFs' gamma parameters + (0.25 <= css_beta < 1.0) + jung_radius (float) : radius of the minimum enclosing ball of a set of a set of 10K latent codes + """ + super(SupportSets, self).__init__() + self.prompt_features = prompt_features + + ################################################################################################################ + ## ## + ## [ Corpus Support Sets (CSS) ] ## + ## ## + ################################################################################################################ + if self.prompt_features is not None: + # Initialization + self.num_support_sets = self.prompt_features.shape[0] + self.num_support_dipoles = 1 + self.support_vectors_dim = self.prompt_features.shape[2] + self.css_beta = css_beta + + ############################################################################################################ + ## [ SUPPORT_SETS: (K, N, d) ] ## + ############################################################################################################ + self.SUPPORT_SETS = nn.Parameter(data=torch.ones(self.num_support_sets, + 2 * self.num_support_dipoles * self.support_vectors_dim), + requires_grad=False) + self.SUPPORT_SETS.data = self.prompt_features.reshape(self.prompt_features.shape[0], + self.prompt_features.shape[1] * + self.prompt_features.shape[2]).clone() + + ############################################################################################################ + ## [ ALPHAS: (K, N) ] ## + ############################################################################################################ + # Define alphas as pairs of [-1, 1] for each dipole + self.ALPHAS = torch.zeros(self.num_support_sets, 2 * self.num_support_dipoles) + for k in range(self.num_support_sets): + a = [] + for _ in range(self.num_support_dipoles): + a.extend([1, -1]) + self.ALPHAS[k] = torch.Tensor(a) + + ############################################################################################################ + ## [ GAMMAS: (K, N) ] ## + ############################################################################################################ + # Define RBF loggammas + self.LOGGAMMA = nn.Parameter(data=torch.ones(self.num_support_sets, 1), requires_grad=False) + for k in range(self.num_support_sets): + g = -np.log(self.css_beta) / (self.prompt_features[k, 1] - self.prompt_features[k, 0]).norm() ** 2 + self.LOGGAMMA.data[k] = torch.log(torch.Tensor([g])) + + ################################################################################################################ + ## ## + ## [ Latent Support Sets (LSS) ] ## + ## ## + ################################################################################################################ + else: + # Initialization + if num_support_sets is None: + raise ValueError("Number of latent support sets not defined.") + else: + self.num_support_sets = num_support_sets + if num_support_dipoles is None: + raise ValueError("Number of latent support dipoles not defined.") + else: + self.num_support_dipoles = num_support_dipoles + if support_vectors_dim is None: + raise ValueError("Latent support vector dimensionality not defined.") + else: + self.support_vectors_dim = support_vectors_dim + if jung_radius is None: + raise ValueError("Jung radius not given.") + else: + self.jung_radius = jung_radius + self.lss_beta = lss_beta + + ############################################################################################################ + ## [ SUPPORT_SETS: (K, N, d) ] ## + ############################################################################################################ + # Choose r_min and r_max based on the Jung radius + self.r_min = 0.90 * self.jung_radius + self.r_max = 1.25 * self.jung_radius + self.radii = torch.arange(self.r_min, self.r_max, (self.r_max - self.r_min) / self.num_support_sets) + self.SUPPORT_SETS = nn.Parameter(data=torch.ones(self.num_support_sets, + 2 * self.num_support_dipoles * self.support_vectors_dim)) + SUPPORT_SETS = torch.zeros(self.num_support_sets, 2 * self.num_support_dipoles, self.support_vectors_dim) + for k in range(self.num_support_sets): + SV_set = [] + for i in range(self.num_support_dipoles): + SV = torch.randn(1, self.support_vectors_dim) + SV_set.extend([SV, -SV]) + SV_set = torch.cat(SV_set) + SV_set = self.radii[k] * SV_set / torch.norm(SV_set, dim=1, keepdim=True) + SUPPORT_SETS[k, :] = SV_set + + # Reshape support sets tensor into a matrix and initialize support sets matrix + self.SUPPORT_SETS.data = SUPPORT_SETS.reshape( + self.num_support_sets, 2 * self.num_support_dipoles * self.support_vectors_dim).clone() + + ############################################################################################################ + ## [ ALPHAS: (K, N) ] ## + ############################################################################################################ + # Define alphas as pairs of [-1, 1] for each dipole + self.ALPHAS = torch.zeros(self.num_support_sets, 2 * self.num_support_dipoles) + for k in range(self.num_support_sets): + a = [] + for _ in range(self.num_support_dipoles): + a.extend([1, -1]) + self.ALPHAS.data[k] = torch.Tensor(a) + + ############################################################################################################ + ## [ GAMMAS: (K, N) ] ## + ############################################################################################################ + # Define RBF loggammas + self.LOGGAMMA = nn.Parameter(data=torch.ones(self.num_support_sets, 1)) + for k in range(self.num_support_sets): + g = -np.log(self.lss_beta) / ((2 * self.radii[k]) ** 2) + self.LOGGAMMA.data[k] = torch.log(torch.Tensor([g])) + + def forward(self, support_sets_mask, z): + # Get RBF support sets batch + support_sets_batch = torch.matmul(support_sets_mask, self.SUPPORT_SETS) + support_sets_batch = support_sets_batch.reshape(-1, 2 * self.num_support_dipoles, self.support_vectors_dim) + + # Get batch of RBF alpha parameters + alphas_batch = torch.matmul(support_sets_mask, self.ALPHAS).unsqueeze(dim=2) + + # Get batch of RBF gamma/log(gamma) parameters + gammas_batch = torch.exp(torch.matmul(support_sets_mask, self.LOGGAMMA).unsqueeze(dim=2)) + + # Calculate grad of f at z + D = z.unsqueeze(dim=1).repeat(1, 2 * self.num_support_dipoles, 1) - support_sets_batch + + grad_f = -2 * (alphas_batch * gammas_batch * + torch.exp(-gammas_batch * (torch.norm(D, dim=2) ** 2).unsqueeze(dim=2)) * D).sum(dim=1) + + return grad_f / torch.norm(grad_f, dim=1, keepdim=True) diff --git a/ContraCLIP/lib/trainer.py b/ContraCLIP/lib/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..dd44075a0b2c63ce9850322a8d8ab2fed14c03dd --- /dev/null +++ b/ContraCLIP/lib/trainer.py @@ -0,0 +1,419 @@ +import sys +import os +import os.path as osp +import clip +import json +import torch +from torch import nn +import torch.nn.functional as F +from torch.optim.lr_scheduler import StepLR +from torchvision import transforms +import numpy as np +import time +import shutil +from .aux import TrainingStatTracker, update_progress, update_stdout, sec2dhms +from .config import SEMANTIC_DIPOLES_CORPORA, STYLEGAN_LAYERS + + +class DataParallelPassthrough(nn.DataParallel): + def __getattr__(self, name): + try: + return super(DataParallelPassthrough, self).__getattr__(name) + except AttributeError: + return getattr(self.module, name) + + +class Trainer(object): + def __init__(self, params=None, exp_dir=None, use_cuda=False, multi_gpu=False): + if params is None: + raise ValueError("Cannot build a Trainer instance with empty params: params={}".format(params)) + else: + self.params = params + self.use_cuda = use_cuda + self.multi_gpu = multi_gpu + + # Set output directory for current experiment (wip) + self.wip_dir = osp.join("experiments", "wip", exp_dir) + + # Set directory for completed experiment + self.complete_dir = osp.join("experiments", "complete", exp_dir) + + # Create log subdirectory and define stat.json file + self.stats_json = osp.join(self.wip_dir, 'stats.json') + if not osp.isfile(self.stats_json): + with open(self.stats_json, 'w') as out: + json.dump({}, out) + + # Create models sub-directory + self.models_dir = osp.join(self.wip_dir, 'models') + os.makedirs(self.models_dir, exist_ok=True) + # Define checkpoint model file + self.checkpoint = osp.join(self.models_dir, 'checkpoint.pt') + + # Array of iteration times + self.iter_times = np.array([]) + + # Set up training statistics tracker + self.stat_tracker = TrainingStatTracker() + + # Define cosine similarity loss + self.cosine_embedding_loss = nn.CosineEmbeddingLoss() + + # Define cross entropy loss + self.cross_entropy_loss = nn.CrossEntropyLoss() + + # Define transform of CLIP image encoder + self.clip_img_transform = transforms.Compose([transforms.Resize(224), + transforms.CenterCrop(224), + transforms.Normalize((0.48145466, 0.4578275, 0.40821073), + (0.26862954, 0.26130258, 0.27577711))]) + + def contrastive_loss(self, img_batch, txt_batch): + n_img, d_img = img_batch.shape + n_txt, d_txt = txt_batch.shape + + # TODO: assert that dimensions are the same? + + # Normalise image and text batches + img_batch_l2 = F.normalize(img_batch, p=2, dim=-1) + txt_batch_l2 = F.normalize(txt_batch, p=2, dim=-1) + + # Calculate inner product similarity matrix + similarity_matrix = torch.matmul(img_batch_l2, txt_batch_l2.T) + labels = torch.arange(n_img) + + return self.cross_entropy_loss(similarity_matrix / self.params.temperature, labels) + + def get_starting_iteration(self, latent_support_sets): + """Check if checkpoint file exists (under `self.models_dir`) and set starting iteration at the checkpoint + iteration; also load checkpoint weights to `latent_support_sets`. Otherwise, set starting iteration to 1 in + order to train from scratch. + + Returns: + starting_iter (int): starting iteration + + """ + starting_iter = 1 + if osp.isfile(self.checkpoint): + checkpoint_dict = torch.load(self.checkpoint) + starting_iter = checkpoint_dict['iter'] + latent_support_sets.load_state_dict(checkpoint_dict['latent_support_sets']) + + return starting_iter + + def log_progress(self, iteration, mean_iter_time, elapsed_time, eta): + """Log progress (loss + ETA). + + Args: + iteration (int) : current iteration + mean_iter_time (float) : mean iteration time + elapsed_time (float) : elapsed time until current iteration + eta (float) : estimated time of experiment completion + """ + # Get current training stats (for the previous `self.params.log_freq` steps) and flush them + stats = self.stat_tracker.get_means() + + # Update training statistics json file + with open(self.stats_json) as f: + stats_dict = json.load(f) + stats_dict.update({iteration: stats}) + with open(self.stats_json, 'w') as out: + json.dump(stats_dict, out) + + # Flush training statistics tracker + self.stat_tracker.flush() + + update_progress(" \\__.Training [bs: {}] [iter: {:06d}/{:06d}] ".format( + self.params.batch_size, iteration, self.params.max_iter), self.params.max_iter, iteration + 1) + if iteration < self.params.max_iter - 1: + print() + print(" ===================================================================") + print(" \\__Loss : {:.08f}".format(stats['loss'])) + print(" ===================================================================") + print(" \\__Mean iter time : {:.3f} sec".format(mean_iter_time)) + print(" \\__Elapsed time : {}".format(sec2dhms(elapsed_time))) + print(" \\__ETA : {}".format(sec2dhms(eta))) + print(" ===================================================================") + update_stdout(8) + + def train(self, generator, latent_support_sets, corpus_support_sets, clip_model): + """GANxPlainer training function. + + Args: + generator : non-trainable (pre-trained) GAN generator + latent_support_sets : trainable LSS model -- interpretable latent paths model + corpus_support_sets : non-trainable CSS model -- non-linear paths in the CLIP space + clip_model : non-trainable (pre-trained) CLIP model + + """ + # Save initial `latent_support_sets` model as `latent_support_sets_init.pt` + torch.save(latent_support_sets.state_dict(), osp.join(self.models_dir, 'latent_support_sets_init.pt')) + + # Save initial `corpus_support_sets` model as `corpus_support_sets_init.pt` + torch.save(corpus_support_sets.state_dict(), osp.join(self.models_dir, 'corpus_support_sets_init.pt')) + + # Save prompt corpus list to json + with open(osp.join(self.models_dir, 'semantic_dipoles.json'), 'w') as json_f: + json.dump(SEMANTIC_DIPOLES_CORPORA[self.params.corpus], json_f) + + # Upload models to GPU if `self.use_cuda` is set (i.e., if args.cuda and torch.cuda.is_available is True). + if self.use_cuda: + generator.cuda().eval() + clip_model.cuda().eval() + corpus_support_sets.cuda() + latent_support_sets.cuda().train() + else: + generator.eval() + clip_model.eval() + latent_support_sets.train() + + # Set latent support sets (LSS) optimizer + latent_support_sets_optim = torch.optim.Adam(latent_support_sets.parameters(), lr=self.params.lr) + + # Set learning rate scheduler -- reduce lr after 90% of the total number of training iterations + latent_support_sets_lr_scheduler = StepLR(optimizer=latent_support_sets_optim, + step_size=int(0.9 * self.params.max_iter), + gamma=0.1) + + # Get starting iteration + starting_iter = self.get_starting_iteration(latent_support_sets) + + # Parallelize models into multiple GPUs, if available and `multi_gpu=True`. + if self.multi_gpu: + print("#. Parallelize G and CLIP over {} GPUs...".format(torch.cuda.device_count())) + # Parallelize generator G + generator = DataParallelPassthrough(generator) + # Parallelize CLIP model + clip_model = DataParallelPassthrough(clip_model) + + # Check starting iteration + if starting_iter == self.params.max_iter: + print("#. This experiment has already been completed and can be found @ {}".format(self.wip_dir)) + print("#. Copy {} to {}...".format(self.wip_dir, self.complete_dir)) + try: + shutil.copytree(src=self.wip_dir, dst=self.complete_dir, ignore=shutil.ignore_patterns('checkpoint.pt')) + print(" \\__Done!") + except IOError as e: + print(" \\__Already exists -- {}".format(e)) + sys.exit() + print("#. Start training from iteration {}".format(starting_iter)) + + # Get experiment's start time + t0 = time.time() + + # Start training + for iteration in range(starting_iter, self.params.max_iter + 1): + + # Get current iteration's start time + iter_t0 = time.time() + + # Set gradients to zero + generator.zero_grad() + latent_support_sets.zero_grad() + clip_model.zero_grad() + + # Sample latent codes from standard Gaussian + z = torch.randn(self.params.batch_size, generator.dim_z) + if self.use_cuda: + z = z.cuda() + + # Generate images for the given latent codes + latent_code = z + if 'stylegan' in self.params.gan: + if self.params.stylegan_space == 'W': + latent_code = generator.get_w(z, truncation=self.params.truncation)[:, 0, :] + elif self.params.stylegan_space == 'W+': + latent_code = generator.get_w(z, truncation=self.params.truncation) + img = generator(latent_code) + + # Sample indices of shift vectors (`self.params.batch_size` out of `self.params.num_support_sets`) + # target_support_sets_indices = torch.randint(0, self.params.num_support_sets, [self.params.batch_size]) + target_support_sets_indices = torch.randint(0, latent_support_sets.num_support_sets, + [self.params.batch_size]) + if self.use_cuda: + target_support_sets_indices = target_support_sets_indices.cuda() + + # Sample shift magnitudes from uniform distributions + # U[self.params.min_shift_magnitude, self.params.max_shift_magnitude], and + # U[-self.params.max_shift_magnitude, self.params.min_shift_magnitude] + # Create a pool of shift magnitudes of 2 * `self.params.batch_size` shifts (half negative, half positive) + # and sample `self.params.batch_size` of them + shift_magnitudes_pos = (self.params.min_shift_magnitude - self.params.max_shift_magnitude) * \ + torch.rand(target_support_sets_indices.size()) + self.params.max_shift_magnitude + shift_magnitudes_neg = (self.params.min_shift_magnitude - self.params.max_shift_magnitude) * \ + torch.rand(target_support_sets_indices.size()) - self.params.min_shift_magnitude + shift_magnitudes_pool = torch.cat((shift_magnitudes_neg, shift_magnitudes_pos)) + + shift_magnitudes_ids = torch.arange(len(shift_magnitudes_pool), dtype=torch.float) + target_shift_magnitudes = shift_magnitudes_pool[torch.multinomial(input=shift_magnitudes_ids, + num_samples=self.params.batch_size, + replacement=False)] + if self.use_cuda: + target_shift_magnitudes = target_shift_magnitudes.cuda() + + # Create support sets mask of size (batch_size, num_support_sets) in the form: + # support_sets_mask[i] = [0, ..., 0, 1, 0, ..., 0] + support_sets_mask = torch.zeros([self.params.batch_size, latent_support_sets.num_support_sets]) + prompt_mask = torch.zeros([self.params.batch_size, 2]) + prompt_sign = torch.zeros([self.params.batch_size, 1]) + if self.use_cuda: + support_sets_mask = support_sets_mask.cuda() + prompt_mask = prompt_mask.cuda() + prompt_sign = prompt_sign.cuda() + for i, (index, val) in enumerate(zip(target_support_sets_indices, target_shift_magnitudes)): + support_sets_mask[i][index] += 1.0 + if val >= 0: + prompt_mask[i, 0] = 1.0 + prompt_sign[i] = +1.0 + else: + prompt_mask[i, 1] = 1.0 + prompt_sign[i] = -1.0 + prompt_mask = prompt_mask.unsqueeze(1) + + # Calculate shift vectors for the given latent codes -- in the case of StyleGAN, shifts live in the + # self.params.stylegan_space, i.e., in Z-, W-, or W+-space. In the Z-/W-space the dimensionality of the + # latent space is 512. In the case of W+-space, the dimensionality is 512 * (self.params.stylegan_layer + 1) + if ('stylegan' in self.params.gan) and (self.params.stylegan_space == 'W+'): + shift = target_shift_magnitudes.reshape(-1, 1) * latent_support_sets( + support_sets_mask, latent_code[:, :self.params.stylegan_layer + 1, :].reshape(latent_code.shape[0], + -1)) + else: + shift = target_shift_magnitudes.reshape(-1, 1) * latent_support_sets(support_sets_mask, latent_code) + + # Generate images the shifted latent codes + if ('stylegan' in self.params.gan) and (self.params.stylegan_space == 'W+'): + latent_code_reshaped = latent_code.reshape(latent_code.shape[0], -1) + shift = F.pad(input=shift, + pad=(0, (STYLEGAN_LAYERS[self.params.gan] - 1 - self.params.stylegan_layer) * 512), + mode='constant', + value=0) + latent_code_shifted = latent_code_reshaped + shift + latent_code_shifted_reshaped = latent_code_shifted.reshape_as(latent_code) + img_shifted = generator(latent_code_shifted_reshaped) + else: + img_shifted = generator(latent_code + shift) + + # TODO: add comment + img_pairs = torch.cat([self.clip_img_transform(img), self.clip_img_transform(img_shifted)], dim=0) + clip_img_pairs_features = clip_model.encode_image(img_pairs) + clip_img_features, clip_img_shifted_features = torch.split(clip_img_pairs_features, img.shape[0], dim=0) + clip_img_diff_features = clip_img_shifted_features - clip_img_features + + ############################################################################################################ + ## ## + ## Linear Text Paths (StyleCLIP approach) ## + ## ## + ############################################################################################################ + if self.params.styleclip: + corpus_text_features_batch = torch.matmul(support_sets_mask, corpus_support_sets.SUPPORT_SETS).reshape( + -1, 2 * corpus_support_sets.num_support_dipoles, corpus_support_sets.support_vectors_dim) + corpus_text_features_batch = torch.matmul(prompt_mask, corpus_text_features_batch).squeeze(1) + + # Calculate cosine similarity loss + if self.params.loss == 'cossim': + loss = self.cosine_embedding_loss(clip_img_shifted_features, corpus_text_features_batch, + torch.ones(corpus_text_features_batch.shape[0]).to( + 'cuda' if self.use_cuda else 'cpu')) + # Calculate contrastive loss + elif self.params.loss == 'contrastive': + loss = self.contrastive_loss(clip_img_shifted_features.float(), corpus_text_features_batch) + + ############################################################################################################ + ## ## + ## Linear Text Paths ## + ## ## + ############################################################################################################ + elif self.params.linear: + corpus_text_features_batch = torch.matmul(support_sets_mask, corpus_support_sets.SUPPORT_SETS).reshape( + -1, 2 * corpus_support_sets.num_support_dipoles, corpus_support_sets.support_vectors_dim) + + # Calculate cosine similarity loss + if self.params.loss == 'cossim': + loss = self.cosine_embedding_loss(clip_img_diff_features, prompt_sign * ( + corpus_text_features_batch[:, 0, :] - corpus_text_features_batch[:, 1, :]) - + clip_img_features, + torch.ones(corpus_text_features_batch.shape[0]).to( + 'cuda' if self.use_cuda else 'cpu')) + # Calculate contrastive loss + elif self.params.loss == 'contrastive': + loss = self.contrastive_loss(clip_img_diff_features.float(), prompt_sign * ( + corpus_text_features_batch[:, 0, :] - corpus_text_features_batch[:, 1, :]) - + clip_img_features) + + ############################################################################################################ + ## ## + ## Non-linear Text Paths ## + ## ## + ############################################################################################################ + else: + # Calculate local text direction using CSS + local_text_directions = target_shift_magnitudes.reshape(-1, 1) * corpus_support_sets(support_sets_mask, + clip_img_features) + # Calculate cosine similarity loss + if self.params.loss == 'cossim': + loss = self.cosine_embedding_loss(clip_img_diff_features, local_text_directions, + torch.ones(local_text_directions.shape[0]).to( + 'cuda' if self.use_cuda else 'cpu')) + # Calculate contrastive loss + elif self.params.loss == 'contrastive': + loss = self.contrastive_loss(img_batch=clip_img_diff_features.float(), + txt_batch=local_text_directions) + + # Back-propagate! + loss.backward() + + # Update weights + clip_model.float() + latent_support_sets_optim.step() + latent_support_sets_lr_scheduler.step() + clip.model.convert_weights(clip_model) + + # Update statistics tracker + self.stat_tracker.update(loss=loss.item()) + + # Get time of completion of current iteration + iter_t = time.time() + + # Compute elapsed time for current iteration and append to `iter_times` + self.iter_times = np.append(self.iter_times, iter_t - iter_t0) + + # Compute elapsed time so far + elapsed_time = iter_t - t0 + + # Compute rolling mean iteration time + mean_iter_time = self.iter_times.mean() + + # Compute estimated time of experiment completion + eta = elapsed_time * ((self.params.max_iter - iteration) / (iteration - starting_iter + 1)) + + # Log progress in stdout + if iteration % self.params.log_freq == 0: + self.log_progress(iteration, mean_iter_time, elapsed_time, eta) + + # Save checkpoint model file and latent support_sets model state dicts after current iteration + if iteration % self.params.ckp_freq == 0: + # Build checkpoint dict + checkpoint_dict = { + 'iter': iteration, + 'latent_support_sets': latent_support_sets.state_dict(), + } + torch.save(checkpoint_dict, self.checkpoint) + # === End of training loop === + + # Get experiment's total elapsed time + elapsed_time = time.time() - t0 + + # Save final latent support sets (LSS) model + latent_support_sets_model_filename = osp.join(self.models_dir, 'latent_support_sets.pt') + torch.save(latent_support_sets.state_dict(), latent_support_sets_model_filename) + + for _ in range(10): + print() + print("#.Training completed -- Total elapsed time: {}.".format(sec2dhms(elapsed_time))) + + print("#. Copy {} to {}...".format(self.wip_dir, self.complete_dir)) + try: + shutil.copytree(src=self.wip_dir, dst=self.complete_dir) + print(" \\__Done!") + except IOError as e: + print(" \\__Already exists -- {}".format(e)) diff --git a/ContraCLIP/models/genforce/.gitignore b/ContraCLIP/models/genforce/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..6e297da256f6fdad0888f7d182292d7e34a1ac5e --- /dev/null +++ b/ContraCLIP/models/genforce/.gitignore @@ -0,0 +1,29 @@ +__pycache__/ +*.py[cod] + +/.vscode/ +/.idea/ +*.sw[pon] + +/data/ +/work_dirs/ +*.jpg +*.png +*.jpeg +*.gif +*.avi +*.mp4 + +*.npy +*.txt +*.json +*.log +*.html +*.tar +*.zip +events.* + +*.pth +*.pkl +*.h5 +*.dat diff --git a/ContraCLIP/models/genforce/LICENSE b/ContraCLIP/models/genforce/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..22ebce6242d646adc157c623e1b0f1c0e07b9497 --- /dev/null +++ b/ContraCLIP/models/genforce/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2020 GenForce + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/ContraCLIP/models/genforce/MODEL_ZOO.md b/ContraCLIP/models/genforce/MODEL_ZOO.md new file mode 100644 index 0000000000000000000000000000000000000000..f30aa7dc6dbaeaeb7c70148d053eb77c04e6a939 --- /dev/null +++ b/ContraCLIP/models/genforce/MODEL_ZOO.md @@ -0,0 +1,131 @@ +# Model Zoo + +## Pre-trained Models + +First of all, we thank the following repositories for their work on high-quality image synthesis + +- [PGGAN](https://github.com/tkarras/progressive_growing_of_gans) +- [StyleGAN](https://github.com/NVlabs/stylegan) +- [StyleGAN2](https://github.com/NVlabs/stylegan2) + +Please download the models you need and save them to `checkpoints/`. + +| PGGAN Official | | | | +| :-- | :-- | :-- | :-- | +| *Face* +| [celebahq-1024x1024](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EW_3jQ6E7xlKvCSHYrbmkQQBAB8tgIv5W5evdT6-GuXiWw?e=gRifVa&download=1) +| *Indoor Scene* +| [bedroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EUZQWGz2GT5Bh_GJLalP63IBvCsXDTOxDFIC_ZBsmoEacA?e=VNXiDb&download=1) | [livingroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Efzh6qQv6QtCm0YN1lulH-YByqdE3AqlI-E6US_hXMuiig?e=ppdyB2&download=1) | [diningroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EcLb3_hGUkdClompZo27xk0BNmotgbFqdIeu-ZOGJsBMRg?e=xjYpN3&download=1) | [kitchen-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ESCyg6hpNn1LlHVX_un1wLsBZAORUNkW9MO2kU1X5kafAQ?e=09TbGC&download=1) +| *Outdoor Scene* +| [churchoutdoor-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EQ8cKujs2TVGjCL_j6bsnk8BqD9REF2ME2lBnpbTPsqIvA?e=zH55fT&download=1) | [tower-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EeyBJvgRVGJClKr1KKYDF_cBT1FDepRU1-GLqYNh8W9-fQ?e=nrpa5N&download=1) | [bridge-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EZ2QScfPy19PiDERLJQ3gPMBP4WmvZHwhNFLzfaP2YD8hQ?e=bef1U9&download=1) +| *Other Scene* +| [restaurant-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERvJ4pz8jgtMrcuJXUfcOQEBDugZ099_TetCQs-9-ILCVg?e=qYsVdQ&download=1) | [classroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EUU9SCOPUxhMoUS4Ceo9kl0BQkVK7d69lA-JeOP-zOWvXw?e=YIB4no&download=1) | [conferenceroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EX8AF0_6NoJAl5vKFewHWnsBk0r4PK4WsqsMrJyj84TrqQ?e=oNQIZS&download=1) +| *Animal* +| [person-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWu4SqR42YpCoqsVJOcM2cMBcdfXA0j5wZ2hno9X0R9ydQ?e=KuDRns&download=1) | [cat-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EQdveyUNOMtAue52n6BxoHoB6Yup5-PTvBDmyfUn7Un4Hw?e=7acGbT&download=1) | [dog-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ESaKyXA5fGlOvXJYDDFbT2kB9c0HlXh9n_wnyhiP05nhow?e=d4aKDV&download=1) | [bird-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ef2p4Pd3AKVCmSm00YikCIABhylh2dLPaFjPfPVn3RiTXA?e=9bRitp&download=1) +| [horse-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EXwCPdv6XqJFtuvFFoswRScBmLJbhKzaC5D_iovl1GFOTw?e=WDdD77&download=1) | [sheep-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ER6J5EKjAUNFtm9VwLf-uUsBZ5dnqxeKsPxY9ijiPtMhcQ?e=OKtfva&download=1) | [cow-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERZLxw7N7xJPm72FyePTbpcByzrr0pH-Fg7qyLt5tYGXwQ?e=ovIPCl&download=1) +| *Transportation* +| [car-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EfGc2we47aFDtAY1548pRvsByIju-uXRbkZEFpJotuPKZw?e=DQqVj8&download=1) | [bicycle-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ed1dN_FgwmdBgeNWhaRUry8BgwT88-n2ppicSDPx-f7f_Q?e=bxTxnf&download=1) | [motorbike-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EV3yQdeJXIdPjZbMO0mp2-MBJbKuuBdypzBL4gnedO57Dw?e=tXdvtD&download=1) | [bus-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ed7-OYLnq0RCqRlM8qK8wZ8B87dz_NUxIKBrvyFUwRCEbg?e=VP5bmX&download=1) +| [train-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EedE2cozKOVAkhvbdLd4SfwBknFW8vWZnKiqgeIBbAvCCA?e=BrLpTl&download=1) | [boat-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Eb39waqQFr9Bp4wO0rC5NHwB0Vz2NGCuqbRPucguBIkDrg?e=lddSyL&download=1) | [airplane-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ee6FzIx3KjNDhxrS5mDvpCEB3iQ7TgErmKhbwbV-eF07iw?e=xflPXa&download=1) +| *Furniture* +| [bottle-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWhoy2AFCTZGtEG1UoayWjcB9Kdc_wreJ8p4RlBB93nbNg?e=DMZceU&download=1) | [chair-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EbQRTfwdostBhXG30Uacn7ABsEUFa-tEW3oxiM5zDYQbRw?e=FkB7T0&download=1) | [pottedplant-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWg7hnoGATBOuJvXWr4m7CQBJL9o7nqnD6nOMRhtH2SKXg?e=Zi3hjD&download=1) | [tvmonitor-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EVXwttoJVtBMuhHNDdK3cMwBdMiZARJV38PMTsL6whnFlA?e=RbG0ru&download=1) +| [diningtable-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EXVzBkbmTCVImMtuHLCTBeMBXZmv0RWyx5KXQQAe7-7D5w?e=6RYSnm&download=1) | [sofa-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EaADQYDXwY9NrzbiUFcRYRgBOu1GdJMG8YgNZZmbNjbn-Q?e=DqKrXG&download=1) + +| StyleGAN Official | | | | +| :-- | :--: | :--: | :--: | +| Model (Dataset) | Training Samples | Training Duration (K Images) | FID +| [ffhq-1024x1024](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EdfMxgb0hU9BoXwiR3dqYDEBowCSEF1IcsW3n4kwfoZ9OQ?e=VwIV58&download=1) | 70,000 | 25,000 | 4.40 | +| [celebahq-1024x1024](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EcCdXHddE7FOvyfmqeOyc9ABqVuWh8PQYFnV6JM1CXvFig?e=1nUYZ5&download=1) | 30,000 | 25,000 | 5.06 | +| [bedroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ea6RBPddjcRNoFMXm8AyEBcBUHdlRNtjtclNKFe89amjBw?e=Og8Vff&download=1) | 3,033,042 | 70,000 | 2.65 | +| [cat-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EVjX8u9HuehLip3z0hRfIHcB7QtoFkTB7NiRDb8nrKOl2w?e=lHcp1B&download=1) | 1,657,266 | 70,000 | 8.53 | +| [car-512x384](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EcRJNNzzUzJGjI2X53S9HjkBhXkKT5JRd6Q3IIhCY1AyRw?e=FvMRNj&download=1) | 5,520,756 | 46,000 | 3.27 | + +| StyleGAN Ours | | | | +| :-- | :--: | :--: | :--: | +| Model (Dataset) | Training Samples | Training Duration (K Images) | FID +| *Face ("partial" means faces are not fully aligned to center)* +| [celeba_partial-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ET2etKNzMS9JmHj5j60fqMcBRJfQfYNvqUrujaIXxCvKDQ?e=QReLE6&download=1) | 103,706 | 50,000 | 7.03 | +| [ffhq-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ES-NAUCC2qdHg87BftvlBiQBVpbJ8-005Q4TNr5KrOxQEw?e=00AnWt&download=1) | 70,000 | 25,000 | 5.70 | +| [ffhq-512x512](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EZYrrwOiEgVOg-PfGv7QTegBzFQ9yq2v7o1WxNq5JJ9KNA?e=SZU8PI&download=1) | 70,000 | 25,000 | 5.15 | +| *LSUN Indoor Scene* +| [livingroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EfFCYLHjqbFDmjOvCCFJgDcBZ1QYgETfZJxp4ZTHjLxZBg?e=InVd0n&download=1) | 1,315,802 | 30,000 | 5.16 | +| [diningroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERsUza_hSFRIm4iZCag7P0kBQ9EIdfQKByw4QYt_ay97lg?e=Cimh7S&download=1) | 657,571 | 25,000 | 4.13 | +| [kitchen-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERcYvoingQNKix35lUs0vUkBQQkAZMp1rtDxjwNlOJAoaA?e=a1Tcwr&download=1) | 1,000,000 | 30,000 | 5.06 | +| *LSUN Indoor Scene Mixture* +| [apartment-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EfurPNSB2BRFtXdqGkmDD6YBwyKN8YK2v7nKwnJQdsbf6A?e=w3oYa4&download=1) | 4 * 200,000 | 60,000 | 4.18 | +| *LSUN Outdoor Scene* +| [church-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ETMgG1_d06tAlbUkJD1qA9IBaLZ9zJKPkG2kO-4jxhVV5w?e=Dbkb7o&download=1) | 126,227 | 30,000 | 4.82 | +| [tower-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ebm9QMgqB2VDqyIE5rFhreEBgZ_RyKcRf8bQ333K453u3w?e=if8sDj&download=1) | 708,264 | 30,000 | 5.99 | +| [bridge-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ed9QM6OP9sVHnazSp4cqPSEBb-ALfBPXRxP1hD7FsTYh8w?e=3vv06p&download=1) | 818,687 | 25,000 | 6.42 | +| *LSUN Other Scene* +| [restaurant-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ESDhYr01WtlEvBNFrVpFezcB2l9lF1rBYuHFoeNpBr5B7A?e=uFWFNh&download=1) | 626,331 | 50,000 | 4.03 | +| [classroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EbWnI3oto9NPk-lxwZlWqPQB2atWpGiTWMIT59MzF9ij9Q?e=KvcNBg&download=1) | 168,103 | 50,000 | 10.10 | +| [conferenceroom-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Eb1gVi3pGa9PgJ4XYYu_6yABQZ0ZcGDak4FEHaTHaeYFzw?e=0BeE8t&download=1) | 229,069 | 50,000 | 6.20 | + +| StyleGAN Third-Party | | +| :-- | :--: | +| Model (Dataset) | Source | +| [animeface-512x512](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWDWflY6lBpGgX0CGQpd2Z4B5wTEVamTOA9JRYne7zdCvA?e=tOzgYA&download=1) | [link](https://www.gwern.net/Faces#portrait-results) +| [animeportrait-512x512](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EXBvhTBi-v5NsnQtrxhFEKsBin4xg-Dud9Jr62AEwFTIxg?e=bMGK7r&download=1) | [link](https://www.gwern.net/Faces#portrait-results) +| [artface-512x512](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Eca0OiGqhyZMmoPbKahSBWQBWvcAH4q2CE3zdZJflp2jkQ?e=h4rWAm&download=1) | [link](https://github.com/ak9250/stylegan-art) + +| StyleGAN2 Official | | | | +| :-- | :--: | :--: | :--: | +| Model (Dataset) | Training Samples | Training Duration (K Images) | FID +| [ffhq-1024x1024](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EX0DNWiBvl5FuOQTF4oMPBYBNSalcxTK0AbLwBn9Y3vfgg?e=Q0sZit&download=1) | 70,000 | 25,000 | 2.84 | +| [church-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EQzDtJUdQ4ROunMGn2sZouEBmNeFX4QWvxjermVE5cZvNA?e=tQ7r9r&download=1) | 126,227 | 48,000 | 3.86 | +| [cat-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EUKXeBwUUbZJr6kup7PW4ekBx2-vmTp8FjcGb10v8bgJxQ?e=nkerMF&download=1) | 1,657,266 | 88,000 | 6.93 | +| [horse-256x256](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EconoT6tb69OuAIqfXRtGlsBZz4vBx01UmmFO-JAS356Jg?e=bcSCC4&download=1) | 2,000,340 | 100,000 | 3.43 | +| [car-512x384](https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EYSnUsxU8KJFuMHhZm-JLWoB0nHxdlbrLHNZ_Qkoe3b9LA?e=Ycjp5A&download=1) | 5,520,756 | 57,000 | 2.32 | + +## Training Datasets + +- [MNIST](http://yann.lecun.com/exdb/mnist/) (60,000 training samples and 10,000 test samples on 10 digital numbers) +- [SVHN](http://ufldl.stanford.edu/housenumbers/) (73,257 training samples, 26,032 testing samples, and 531,131 additional samples on 10 digital numbers) +- [CIFAR10](https://www.cs.toronto.edu/~kriz/cifar.html) (50,000 training samples and 10,000 test samples on 10 classes) +- [CIFAR100](https://www.cs.toronto.edu/~kriz/cifar.html) (50,000 training samples and 10,000 test samples on 100 classes) +- [ImageNet](http://www.image-net.org/) (1,281,167 training samples, 50,000 validation samples, and 100,100 testing samples on 1000 classes) +- [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) (202,599 samples from 10,177 identities, with 5 landmarks and 40 binary facial attributes) +- [CelebA-HQ](https://github.com/tkarras/progressive_growing_of_gans) (30,000 samples) +- [FF-HQ](https://github.com/NVlabs/ffhq-dataset) (70,000 samples) +- [LSUN](https://github.com/fyu/lsun) (see statistical information below) +- [Places](http://places2.csail.mit.edu/) (around 1.8M training samples covering 365 classes) +- [Cityscapes](https://www.cityscapes-dataset.com/) (2,975 training samples, 19998 extra training samples (one broken), 500 validation samples, and 1,525 test samples) +- [Streetscapes](http://streetscore.media.mit.edu/data.html) + +Statistical information of [LSUN](https://github.com/fyu/lsun) dataset is summarized as follows: + +| LSUN Datasets Stats | | | +| :-- | :--: | :--: | +| Name | Number of Samples | Size | +| *Scenes* +| bedroom (train) | 3,033,042 | 43G | +| bridge (train) | 818,687 | 15G | +| churchoutdoor (train) | 126,227 | 2G | +| classroom (train) | 168,103 | 3G | +| conferenceroom (train) | 229,069 | 4G | +| diningroom (train) | 657,571 | 11G | +| kitchen (train) | 2,212,277 | 33G | +| livingroom (train) | 1,315,802 | 21G | +| restaurant (train) | 626,331 | 13G | +| tower (train) | 708,264 | 11G | +| *Objects* +| airplane | 1,530,696 | 34G | +| bicycle | 3,347,211 | 129G | +| bird | 2,310,362 | 65G | +| boat | 2,651,165 | 86G | +| bottle | 3,202,760 | 64G | +| bus | 695,891 | 24G | +| car | 5,520,756 | 173G | +| cat | 1,657,266 | 42G | +| chair | 5,037,807 | 116G | +| cow | 377,379 | 15G | +| diningtable | 1,537,123 | 48G | +| dog | 5,054,817 | 145G | +| horse | 2,000,340 | 69G | +| motorbike | 1,194,101 | 42G | +| person | 18,890,816 | 477G | +| pottedplant | 1,104,859 | 43G | +| sheep | 418,983 | 18G | +| sofa | 2,365,870 | 56G | +| train | 1,148,020 | 43G | +| tvmonitor | 2,463,284 | 46G | diff --git a/ContraCLIP/models/genforce/README.md b/ContraCLIP/models/genforce/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9100a9d0c6c694a013e5eede5f8f932221a03946 --- /dev/null +++ b/ContraCLIP/models/genforce/README.md @@ -0,0 +1,169 @@ +# GenForce Lib for Generative Modeling + +An efficient PyTorch library for deep generative modeling. May the Generative Force (GenForce) be with You. + +![image](./teaser.gif) + +## Updates + +- **Encoder Training:** We support training encoders on top of pre-trained GANs for GAN inversion. +- **Model Converters:** You can easily migrate your already started projects to this repository. Please check [here](./converters/README.md) for more details. + +## Highlights + +- **Distributed** training framework. +- **Fast** training speed. +- **Modular** design for prototyping new models. +- **Model zoo** containing a rich set of pretrained GAN models, with [Colab live demo](https://colab.research.google.com/github/genforce/genforce/blob/master/docs/synthesize_demo.ipynb) to play. + +## Installation + +1. Create a virtual environment via `conda`. + + ```shell + conda create -n genforce python=3.7 + conda activate genforce + ``` + +2. Install `cuda` and `cudnn`. (We use `CUDA 10.0` in case you would like to use `TensorFlow 1.15` for model conversion.) + + ```shell + conda install cudatoolkit=10.0 cudnn=7.6.5 + ``` + +3. Install `torch` and `torchvision`. + + ```shell + pip install torch==1.7 torchvision==0.8 + ``` + +4. Install requirements + + ```shell + pip install -r requirements.txt + ``` + +## Quick Demo + +We provide a quick training demo, `scripts/stylegan_training_demo.py`, which allows to train StyleGAN on a toy dataset (500 animeface images with 64 x 64 resolution). Try it via + +```shell +./scripts/stylegan_training_demo.sh +``` + +We also provide an inference demo, `synthesize.py`, which allows to synthesize images with pre-trained models. Generated images can be found at `work_dirs/synthesis_results/`. Try it via + +```shell +python synthesize.py stylegan_ffhq1024 +``` + +You can also play the demo at [Colab](https://colab.research.google.com/github/genforce/genforce/blob/master/docs/synthesize_demo.ipynb). + +## Play with GANs + +### Test + +Pre-trained models can be found at [model zoo](MODEL_ZOO.md). + +- On local machine: + + ```shell + GPUS=8 + CONFIG=configs/stylegan_ffhq256_val.py + WORK_DIR=work_dirs/stylegan_ffhq256_val + CHECKPOINT=checkpoints/stylegan_ffhq256.pth + ./scripts/dist_test.sh ${GPUS} ${CONFIG} ${WORK_DIR} ${CHECKPOINT} + ``` + +- Using `slurm`: + + ```shell + CONFIG=configs/stylegan_ffhq256_val.py + WORK_DIR=work_dirs/stylegan_ffhq256_val + CHECKPOINT=checkpoints/stylegan_ffhq256.pth + GPUS=8 ./scripts/slurm_test.sh ${PARTITION} ${JOB_NAME} \ + ${CONFIG} ${WORK_DIR} ${CHECKPOINT} + ``` + +### Train + +All log files in the training process, such as log message, checkpoints, synthesis snapshots, etc, will be saved to the work directory. + +- On local machine: + + ```shell + GPUS=8 + CONFIG=configs/stylegan_ffhq256.py + WORK_DIR=work_dirs/stylegan_ffhq256_train + ./scripts/dist_train.sh ${GPUS} ${CONFIG} ${WORK_DIR} \ + [--options additional_arguments] + ``` + +- Using `slurm`: + + ```shell + CONFIG=configs/stylegan_ffhq256.py + WORK_DIR=work_dirs/stylegan_ffhq256_train + GPUS=8 ./scripts/slurm_train.sh ${PARTITION} ${JOB_NAME} \ + ${CONFIG} ${WORK_DIR} \ + [--options additional_arguments] + ``` + +## Play with Encoders for GAN Inversion + +### Train + +- On local machine: + + ```shell + GPUS=8 + CONFIG=configs/stylegan_ffhq256_encoder_y.py + WORK_DIR=work_dirs/stylegan_ffhq256_encoder_y + ./scripts/dist_train.sh ${GPUS} ${CONFIG} ${WORK_DIR} \ + [--options additional_arguments] + ``` + + +- Using `slurm`: + + ```shell + CONFIG=configs/stylegan_ffhq256_encoder_y.py + WORK_DIR=work_dirs/stylegan_ffhq256_encoder_y + GPUS=8 ./scripts/slurm_train.sh ${PARTITION} ${JOB_NAME} \ + ${CONFIG} ${WORK_DIR} \ + [--options additional_arguments] + ``` +## Contributors + +| Member | Module | +| :-- | :-- | +|[Yujun Shen](http://shenyujun.github.io/) | models and running controllers +|[Yinghao Xu](https://justimyhxu.github.io/) | runner and loss functions +|[Ceyuan Yang](http://ceyuan.me/) | data loader +|[Jiapeng Zhu](https://zhujiapeng.github.io/) | evaluation metrics +|[Bolei Zhou](http://bzhou.ie.cuhk.edu.hk/) | cheerleader + +**NOTE:** The above form only lists the person in charge for each module. We help each other a lot and develop as a **TEAM**. + +*We welcome external contributors to join us for improving this library.* + +## License + +The project is under the [MIT License](./LICENSE). + +## Acknowledgement + +We thank [PGGAN](https://github.com/tkarras/progressive_growing_of_gans), [StyleGAN](https://github.com/NVlabs/stylegan), [StyleGAN2](https://github.com/NVlabs/stylegan2), [StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada) for their work on high-quality image synthesis. We thank [IDInvert](https://github.com/genforce/idinvert) and [GHFeat](https://github.com/genforce/ghfeat) for their contribution to GAN inversion. We also thank [MMCV](https://github.com/open-mmlab/mmcv) for the inspiration on the design of controllers. + +## BibTex + +We open source this library to the community to facilitate the research of generative modeling. If you do like our work and use the codebase or models for your research, please cite our work as follows. + +```bibtex +@misc{genforce2020, + title = {GenForce}, + author = {Shen, Yujun and Xu, Yinghao and Yang, Ceyuan and Zhu, Jiapeng and Zhou, Bolei}, + howpublished = {\url{https://github.com/genforce/genforce}}, + year = {2020} +} +``` diff --git a/ContraCLIP/models/genforce/__init__.py b/ContraCLIP/models/genforce/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ContraCLIP/models/genforce/configs/stylegan_demo.py b/ContraCLIP/models/genforce/configs/stylegan_demo.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d268d7e00be48ba84b015d1751666adb39403c --- /dev/null +++ b/ContraCLIP/models/genforce/configs/stylegan_demo.py @@ -0,0 +1,61 @@ +# python3.7 +"""Configuration for StyleGAN training demo. + +All settings are particularly used for one replica (GPU), such as `batch_size` +and `num_workers`. +""" + +runner_type = 'StyleGANRunner' +gan_type = 'stylegan' +resolution = 64 +batch_size = 4 +val_batch_size = 32 +total_img = 100_000 + +# Training dataset is repeated at the beginning to avoid loading dataset +# repeatedly at the end of each epoch. This can save some I/O time. +data = dict( + num_workers=4, + repeat=500, + train=dict(root_dir='data/demo.zip', data_format='zip', + resolution=resolution, mirror=0.5), + val=dict(root_dir='data/demo.zip', data_format='zip', + resolution=resolution), +) + +controllers = dict( + RunningLogger=dict(every_n_iters=10), + ProgressScheduler=dict( + every_n_iters=1, init_res=8, minibatch_repeats=4, + lod_training_img=5_000, lod_transition_img=5_000, + batch_size_schedule=dict(res4=64, res8=32, res16=16, res32=8), + ), + Snapshoter=dict(every_n_iters=500, first_iter=True, num=200), + FIDEvaluator=dict(every_n_iters=5000, first_iter=True, num=50000), + Checkpointer=dict(every_n_iters=5000, first_iter=True), +) + +modules = dict( + discriminator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='FIXED'), + opt=dict(opt_type='Adam', base_lr=1e-3, betas=(0.0, 0.99)), + kwargs_train=dict(), + kwargs_val=dict(), + ), + generator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='FIXED'), + opt=dict(opt_type='Adam', base_lr=1e-3, betas=(0.0, 0.99)), + kwargs_train=dict(w_moving_decay=0.995, style_mixing_prob=0.9, + trunc_psi=1.0, trunc_layers=0, randomize_noise=True), + kwargs_val=dict(trunc_psi=1.0, trunc_layers=0, randomize_noise=False), + g_smooth_img=10000, + ) +) + +loss = dict( + type='LogisticGANLoss', + d_loss_kwargs=dict(r1_gamma=10.0), + g_loss_kwargs=dict(), +) diff --git a/ContraCLIP/models/genforce/configs/stylegan_ffhq1024.py b/ContraCLIP/models/genforce/configs/stylegan_ffhq1024.py new file mode 100644 index 0000000000000000000000000000000000000000..0da23abd89966c69d292334979d0ef6cbff1ab69 --- /dev/null +++ b/ContraCLIP/models/genforce/configs/stylegan_ffhq1024.py @@ -0,0 +1,63 @@ +# python3.7 +"""Configuration for training StyleGAN on FF-HQ (1024) dataset. + +All settings are particularly used for one replica (GPU), such as `batch_size` +and `num_workers`. +""" + +runner_type = 'StyleGANRunner' +gan_type = 'stylegan' +resolution = 1024 +batch_size = 4 +val_batch_size = 16 +total_img = 25000_000 + +# Training dataset is repeated at the beginning to avoid loading dataset +# repeatedly at the end of each epoch. This can save some I/O time. +data = dict( + num_workers=4, + repeat=500, + # train=dict(root_dir='data/ffhq', resolution=resolution, mirror=0.5), + # val=dict(root_dir='data/ffhq', resolution=resolution), + train=dict(root_dir='data/ffhq.zip', data_format='zip', + resolution=resolution, mirror=0.5), + val=dict(root_dir='data/ffhq.zip', data_format='zip', + resolution=resolution), +) + +controllers = dict( + RunningLogger=dict(every_n_iters=10), + ProgressScheduler=dict( + every_n_iters=1, init_res=8, minibatch_repeats=4, + lod_training_img=600_000, lod_transition_img=600_000, + batch_size_schedule=dict(res4=64, res8=32, res16=16, res32=8), + ), + Snapshoter=dict(every_n_iters=500, first_iter=True, num=200), + FIDEvaluator=dict(every_n_iters=5000, first_iter=True, num=50000), + Checkpointer=dict(every_n_iters=5000, first_iter=True), +) + +modules = dict( + discriminator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='FIXED'), + opt=dict(opt_type='Adam', base_lr=1e-3, betas=(0.0, 0.99)), + kwargs_train=dict(), + kwargs_val=dict(), + ), + generator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='FIXED'), + opt=dict(opt_type='Adam', base_lr=1e-3, betas=(0.0, 0.99)), + kwargs_train=dict(w_moving_decay=0.995, style_mixing_prob=0.9, + trunc_psi=1.0, trunc_layers=0, randomize_noise=True), + kwargs_val=dict(trunc_psi=1.0, trunc_layers=0, randomize_noise=False), + g_smooth_img=10_000, + ) +) + +loss = dict( + type='LogisticGANLoss', + d_loss_kwargs=dict(r1_gamma=10.0), + g_loss_kwargs=dict(), +) diff --git a/ContraCLIP/models/genforce/configs/stylegan_ffhq1024_val.py b/ContraCLIP/models/genforce/configs/stylegan_ffhq1024_val.py new file mode 100644 index 0000000000000000000000000000000000000000..33850aeafdf0fa68d8904c6ac0f1dd89be6bc977 --- /dev/null +++ b/ContraCLIP/models/genforce/configs/stylegan_ffhq1024_val.py @@ -0,0 +1,29 @@ +# python3.7 +"""Configuration for testing StyleGAN on FF-HQ (1024) dataset. + +All settings are particularly used for one replica (GPU), such as `batch_size` +and `num_workers`. +""" + +runner_type = 'StyleGANRunner' +gan_type = 'stylegan' +resolution = 1024 +batch_size = 16 + +data = dict( + num_workers=4, + # val=dict(root_dir='data/ffhq', resolution=resolution), + val=dict(root_dir='data/ffhq.zip', data_format='zip', + resolution=resolution), +) + +modules = dict( + discriminator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + kwargs_val=dict(), + ), + generator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + kwargs_val=dict(trunc_psi=0.7, trunc_layers=8, randomize_noise=False), + ) +) diff --git a/ContraCLIP/models/genforce/configs/stylegan_ffhq256.py b/ContraCLIP/models/genforce/configs/stylegan_ffhq256.py new file mode 100644 index 0000000000000000000000000000000000000000..fcbedef8a87d9fea54750f9a38ca7aeb9de73c82 --- /dev/null +++ b/ContraCLIP/models/genforce/configs/stylegan_ffhq256.py @@ -0,0 +1,63 @@ +# python3.7 +"""Configuration for training StyleGAN on FF-HQ (256) dataset. + +All settings are particularly used for one replica (GPU), such as `batch_size` +and `num_workers`. +""" + +runner_type = 'StyleGANRunner' +gan_type = 'stylegan' +resolution = 256 +batch_size = 4 +val_batch_size = 64 +total_img = 25000_000 + +# Training dataset is repeated at the beginning to avoid loading dataset +# repeatedly at the end of each epoch. This can save some I/O time. +data = dict( + num_workers=4, + repeat=500, + # train=dict(root_dir='data/ffhq', resolution=resolution, mirror=0.5), + # val=dict(root_dir='data/ffhq', resolution=resolution), + train=dict(root_dir='data/ffhq.zip', data_format='zip', + resolution=resolution, mirror=0.5), + val=dict(root_dir='data/ffhq.zip', data_format='zip', + resolution=resolution), +) + +controllers = dict( + RunningLogger=dict(every_n_iters=10), + ProgressScheduler=dict( + every_n_iters=1, init_res=8, minibatch_repeats=4, + lod_training_img=600_000, lod_transition_img=600_000, + batch_size_schedule=dict(res4=64, res8=32, res16=16, res32=8), + ), + Snapshoter=dict(every_n_iters=500, first_iter=True, num=200), + FIDEvaluator=dict(every_n_iters=5000, first_iter=True, num=50000), + Checkpointer=dict(every_n_iters=5000, first_iter=True), +) + +modules = dict( + discriminator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='FIXED'), + opt=dict(opt_type='Adam', base_lr=1e-3, betas=(0.0, 0.99)), + kwargs_train=dict(), + kwargs_val=dict(), + ), + generator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='FIXED'), + opt=dict(opt_type='Adam', base_lr=1e-3, betas=(0.0, 0.99)), + kwargs_train=dict(w_moving_decay=0.995, style_mixing_prob=0.9, + trunc_psi=1.0, trunc_layers=0, randomize_noise=True), + kwargs_val=dict(trunc_psi=1.0, trunc_layers=0, randomize_noise=False), + g_smooth_img=10_000, + ) +) + +loss = dict( + type='LogisticGANLoss', + d_loss_kwargs=dict(r1_gamma=10.0), + g_loss_kwargs=dict(), +) diff --git a/ContraCLIP/models/genforce/configs/stylegan_ffhq256_encoder_y.py b/ContraCLIP/models/genforce/configs/stylegan_ffhq256_encoder_y.py new file mode 100644 index 0000000000000000000000000000000000000000..1d8e26a91f46b0d3bab1288ec359a7818295b36a --- /dev/null +++ b/ContraCLIP/models/genforce/configs/stylegan_ffhq256_encoder_y.py @@ -0,0 +1,73 @@ +# python3.7 +"""Configuration for training StyleGAN Encoder on FF-HQ (256) dataset. + +All settings are particularly used for one replica (GPU), such as `batch_size` +and `num_workers`. +""" + +gan_model_path = 'checkpoints/stylegan_ffhq256.pth' +perceptual_model_path = 'checkpoints/vgg16.pth' + +runner_type = 'EncoderRunner' +gan_type = 'stylegan' +resolution = 256 +batch_size = 12 +val_batch_size = 25 +total_img = 14000_000 +space_of_latent = 'y' + +# Training dataset is repeated at the beginning to avoid loading dataset +# repeatedly at the end of each epoch. This can save some I/O time. +data = dict( + num_workers=4, + repeat=500, + # train=dict(root_dir='data/ffhq', resolution=resolution, mirror=0.5), + # val=dict(root_dir='data/ffhq', resolution=resolution), + train=dict(root_dir='data/', data_format='list', + image_list_path='data/ffhq/ffhq_train_list.txt', + resolution=resolution, mirror=0.5), + val=dict(root_dir='data/', data_format='list', + image_list_path='./data/ffhq/ffhq_val_list.txt', + resolution=resolution), +) + +controllers = dict( + RunningLogger=dict(every_n_iters=50), + Snapshoter=dict(every_n_iters=10000, first_iter=True, num=200), + Checkpointer=dict(every_n_iters=10000, first_iter=False), +) + +modules = dict( + discriminator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + lr=dict(lr_type='ExpSTEP', decay_factor=0.8, decay_step=36458 // 2), + opt=dict(opt_type='Adam', base_lr=1e-4, betas=(0.9, 0.99)), + kwargs_train=dict(), + kwargs_val=dict(), + ), + generator=dict( + model=dict(gan_type=gan_type, resolution=resolution, repeat_w=True), + kwargs_val=dict(randomize_noise=False), + ), + encoder=dict( + model=dict(gan_type=gan_type, resolution=resolution, network_depth=18, + latent_dim = [1024] * 8 + [512, 512, 256, 256, 128, 128], + num_latents_per_head=[4, 4, 6], + use_fpn=True, + fpn_channels=512, + use_sam=True, + sam_channels=512), + lr=dict(lr_type='ExpSTEP', decay_factor=0.8, decay_step=36458 // 2), + opt=dict(opt_type='Adam', base_lr=1e-4, betas=(0.9, 0.99)), + kwargs_train=dict(), + kwargs_val=dict(), + ), +) + +loss = dict( + type='EncoderLoss', + d_loss_kwargs=dict(r1_gamma=10.0), + e_loss_kwargs=dict(adv_lw=0.08, perceptual_lw=5e-5), + perceptual_kwargs=dict(output_layer_idx=23, + pretrained_weight_path=perceptual_model_path), +) diff --git a/ContraCLIP/models/genforce/configs/stylegan_ffhq256_val.py b/ContraCLIP/models/genforce/configs/stylegan_ffhq256_val.py new file mode 100644 index 0000000000000000000000000000000000000000..092f0d1926e0e35d2035bda1ef8a83cc4d8f1fbd --- /dev/null +++ b/ContraCLIP/models/genforce/configs/stylegan_ffhq256_val.py @@ -0,0 +1,29 @@ +# python3.7 +"""Configuration for testing StyleGAN on FF-HQ (256) dataset. + +All settings are particularly used for one replica (GPU), such as `batch_size` +and `num_workers`. +""" + +runner_type = 'StyleGANRunner' +gan_type = 'stylegan' +resolution = 256 +batch_size = 64 + +data = dict( + num_workers=4, + # val=dict(root_dir='data/ffhq', resolution=resolution), + val=dict(root_dir='data/ffhq.zip', data_format='zip', + resolution=resolution), +) + +modules = dict( + discriminator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + kwargs_val=dict(), + ), + generator=dict( + model=dict(gan_type=gan_type, resolution=resolution), + kwargs_val=dict(trunc_psi=0.7, trunc_layers=8, randomize_noise=False), + ) +) diff --git a/ContraCLIP/models/genforce/convert_model.py b/ContraCLIP/models/genforce/convert_model.py new file mode 100644 index 0000000000000000000000000000000000000000..3827a158a09a10caec33582f7e7308fac36385d8 --- /dev/null +++ b/ContraCLIP/models/genforce/convert_model.py @@ -0,0 +1,77 @@ +"""Script to convert officially released models to match this repository.""" + +import argparse + +from converters import convert_pggan_weight +from converters import convert_stylegan_weight +from converters import convert_stylegan2_weight +from converters import convert_stylegan2ada_tf_weight +from converters import convert_stylegan2ada_pth_weight + + +def parse_args(): + """Parses arguments.""" + parser = argparse.ArgumentParser(description='Convert pre-trained models.') + parser.add_argument('model_type', type=str, + choices=['pggan', 'stylegan', 'stylegan2', + 'stylegan2ada_tf', 'stylegan2ada_pth'], + help='Type of the model to convert') + parser.add_argument('--source_model_path', type=str, required=True, + help='Path to load the model for conversion.') + parser.add_argument('--target_model_path', type=str, default=None, + help='Path to save the converted model. If not ' + 'specified, the model will be saved to the same ' + 'directory of the source model.') + parser.add_argument('--test_num', type=int, default=10, + help='Number of test samples used to check the ' + 'precision of the converted model. (default: 10)') + parser.add_argument('--save_test_image', action='store_true', + help='Whether to save the test image. (default: False)') + parser.add_argument('--verbose_log', action='store_true', + help='Whether to print verbose log. (default: False)') + return parser.parse_args() + + +def main(): + """Main function.""" + args = parse_args() + if args.target_model_path is None: + args.target_model_path = args.source_model_path.replace('.pkl', '.pth') + + if args.model_type == 'pggan': + convert_pggan_weight(tf_weight_path=args.source_model_path, + pth_weight_path=args.target_model_path, + test_num=args.test_num, + save_test_image=args.save_test_image, + verbose=args.verbose_log) + elif args.model_type == 'stylegan': + convert_stylegan_weight(tf_weight_path=args.source_model_path, + pth_weight_path=args.target_model_path, + test_num=args.test_num, + save_test_image=args.save_test_image, + verbose=args.verbose_log) + elif args.model_type == 'stylegan2': + convert_stylegan2_weight(tf_weight_path=args.source_model_path, + pth_weight_path=args.target_model_path, + test_num=args.test_num, + save_test_image=args.save_test_image, + verbose=args.verbose_log) + elif args.model_type == 'stylegan2ada_tf': + convert_stylegan2ada_tf_weight(tf_weight_path=args.source_model_path, + pth_weight_path=args.target_model_path, + test_num=args.test_num, + save_test_image=args.save_test_image, + verbose=args.verbose_log) + elif args.model_type == 'stylegan2ada_pth': + convert_stylegan2ada_pth_weight(src_weight_path=args.source_model_path, + dst_weight_path=args.target_model_path, + test_num=args.test_num, + save_test_image=args.save_test_image, + verbose=args.verbose_log) + else: + raise NotImplementedError(f'Model type `{args.model_type}` is not ' + f'supported!') + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/models/genforce/converters/README.md b/ContraCLIP/models/genforce/converters/README.md new file mode 100644 index 0000000000000000000000000000000000000000..83ec80ff409d519fb10c707d29f2d31f0fc05b88 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/README.md @@ -0,0 +1,37 @@ +# Model Converters + +## Introduction + +Besides training, we also support converting pre-trained model weights from officially released models and using them for inference. So, if you have already trained some models with the officially open-sourced codebase, don't worry, we have already made sure that they well match our codebase! We now support models trained with following repositories: + +- [PGGAN](https://github.com/tkarras/progressive_growing_of_gans) (TensorFlow) +- [StyleGAN](https://github.com/NVlabs/stylegan) (TensorFlow) +- [StyleGAN2](https://github.com/NVlabs/stylegan2) (TensorFlow) +- [StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada) (TensorFlow) +- [StyleGAN2-ADA-PyTorch](https://github.com/NVlabs/stylegan2-ada-pytorch) (PyTorch) + +**NOTE:** Our codebase is completely built on PyTorch. But, if you want to convert the official TensorFlow model, you need to setup the TensorFlow environment. This can be easily done with `pip install tensorflow-gpu==1.15`. + +We also mirror the officially open-sourced codes in this folder, which are relied on by `pickle.load()`. Specifically, we have + +- `pggan_official/`: [PGGAN](https://github.com/tkarras/progressive_growing_of_gans) +- `stylegan_official/`: [StyleGAN](https://github.com/NVlabs/stylegan) +- `stylegan2_official/`: [StyleGAN2](https://github.com/NVlabs/stylegan2) +- `stylegan2ada_tf_official/`: [StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada) +- `stylegan2ada_pth_official/`: [StyleGAN2-ADA-PyTorch](https://github.com/NVlabs/stylegan2-ada-pytorch) + +**NOTE:** These codes will ONLY be used for model conversion. After that, all codes within this folder will not be used anymore. + +## Usage + +The script to convert a model is provided as `../convert_model.py`. For example, to convert a pre-trained StyleGAN2 model (officially TensorFlow version), just run + +```shell +cd .. +python convert_model.py stylegan2 \ + --source_model_path ${SOURCE_MODEL_PATH} \ + --test_num 10 \ + --save_test_image +``` + +The above command will execute the conversion and then test the conversion precision. diff --git a/ContraCLIP/models/genforce/converters/__init__.py b/ContraCLIP/models/genforce/converters/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c8de688f221dbabb74038bad6937d16a579ec3a8 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/__init__.py @@ -0,0 +1,14 @@ +# python3.7 +"""Collects all model converters.""" + +from .pggan_converter import convert_pggan_weight +from .stylegan_converter import convert_stylegan_weight +from .stylegan2_converter import convert_stylegan2_weight +from .stylegan2ada_tf_converter import convert_stylegan2ada_tf_weight +from .stylegan2ada_pth_converter import convert_stylegan2ada_pth_weight + +__all__ = [ + 'convert_pggan_weight', 'convert_stylegan_weight', + 'convert_stylegan2_weight', 'convert_stylegan2ada_tf_weight', + 'convert_stylegan2ada_pth_weight' +] diff --git a/ContraCLIP/models/genforce/converters/pggan_converter.py b/ContraCLIP/models/genforce/converters/pggan_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..b458174fad011f79b4d789d4705e9c6ae5ff4d97 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_converter.py @@ -0,0 +1,225 @@ +# python3.7 +"""Converts PGGAN model weights from TensorFlow to PyTorch. + +The models can be trained through OR released by the repository: + +https://github.com/tkarras/progressive_growing_of_gans +""" + +import os +import sys +import pickle +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) + +# pylint: disable=wrong-import-position +from tqdm import tqdm +import numpy as np +import tensorflow as tf +import torch +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +from models import build_model +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image +# pylint: enable=wrong-import-position + +__all__ = ['convert_pggan_weight'] + +GAN_TPYE = 'pggan' +OFFICIAL_CODE_DIR = 'pggan_official' +BASE_DIR = os.path.dirname(os.path.relpath(__file__)) +CODE_PATH = os.path.join(BASE_DIR, OFFICIAL_CODE_DIR) + + +def convert_pggan_weight(tf_weight_path, + pth_weight_path, + test_num=10, + save_test_image=False, + verbose=False): + """Converts the pre-trained PGGAN weights. + + Args: + tf_weight_path: Path to the TensorFlow model to load weights from. + pth_weight_path: Path to the PyTorch model to save converted weights. + test_num: Number of samples used to test the conversion. (default: 10) + save_test_image: Whether to save the test images. (default: False) + verbose: Whether to print verbose log message. (default: False) + """ + sess = tf.compat.v1.InteractiveSession() + + print(f'========================================') + print(f'Loading TensorFlow weights from `{tf_weight_path}` ...') + sys.path.insert(0, CODE_PATH) + with open(tf_weight_path, 'rb') as f: + G, D, Gs = pickle.load(f) + sys.path.pop(0) + print(f'Successfully loaded!') + print(f'--------------------') + + z_space_dim = G.input_shapes[0][1] + label_size = G.input_shapes[1][1] + image_channels = G.output_shape[1] + resolution = G.output_shape[2] + + print(f'Converting TensorFlow weights (G) to PyTorch version ...') + G_vars = dict(G.__getstate__()['variables']) + G_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + label_size=label_size, + image_channels=image_channels) + G_state_dict = G_pth.state_dict() + for pth_var_name, tf_var_name in G_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in G_vars + assert pth_var_name in G_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(G_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.view(var.shape[0], -1, G_pth.init_res, G_pth.init_res) + var = var.permute(1, 0, 2, 3).flip(2, 3) + else: + var = var.permute(3, 2, 0, 1) + G_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (Gs) to PyTorch version ...') + Gs_vars = dict(Gs.__getstate__()['variables']) + Gs_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + label_size=label_size, + image_channels=image_channels) + Gs_state_dict = Gs_pth.state_dict() + for pth_var_name, tf_var_name in Gs_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in Gs_vars + assert pth_var_name in Gs_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(Gs_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.view( + var.shape[0], -1, Gs_pth.init_res, Gs_pth.init_res) + var = var.permute(1, 0, 2, 3).flip(2, 3) + else: + var = var.permute(3, 2, 0, 1) + Gs_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (D) to PyTorch version ...') + D_vars = dict(D.__getstate__()['variables']) + D_pth = build_model(gan_type=GAN_TPYE, + module='discriminator', + resolution=resolution, + label_size=label_size, + image_channels=image_channels) + D_state_dict = D_pth.state_dict() + for pth_var_name, tf_var_name in D_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in D_vars + assert pth_var_name in D_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(D_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + else: + var = var.permute(3, 2, 0, 1) + D_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Saving PyTorch weights to `{pth_weight_path}` ...') + state_dict = { + 'generator': G_state_dict, + 'discriminator': D_state_dict, + 'generator_smooth': Gs_state_dict, + } + torch.save(state_dict, pth_weight_path) + print(f'Successfully saved!') + print(f'--------------------') + + # Start testing if needed. + if test_num <= 0 or not tf.test.is_built_with_cuda(): + warnings.warn(f'Skip testing the converted weights!') + sess.close() + return + + if save_test_image: + html = HtmlPageVisualizer(num_rows=test_num, num_cols=3) + html.set_headers(['Index', 'Before Conversion', 'After Conversion']) + for i in range(test_num): + html.set_cell(i, 0, text=f'{i}') + + print(f'Testing conversion results ...') + G_pth.load_state_dict(G_state_dict) + D_pth.load_state_dict(D_state_dict) + Gs_pth.load_state_dict(Gs_state_dict) + G_pth.eval().cuda() + D_pth.eval().cuda() + Gs_pth.eval().cuda() + + gs_distance = 0.0 + dg_distance = 0.0 + for i in tqdm(range(test_num)): + # Test Gs(z). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + label = np.zeros((1, label_size), np.float32) + if label_size: + label_id = np.random.randint(label_size) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + pth_label = None + tf_output = Gs.run(code, label) + pth_output = Gs_pth(pth_code, label=pth_label)['image'] + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: Gs distance {distance:.6e}.') + gs_distance += distance + + if save_test_image: + html.set_cell(i, 1, image=postprocess_image(tf_output)[0]) + html.set_cell(i, 2, image=postprocess_image(pth_output)[0]) + + # Test D(G(z)). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + label = np.zeros((1, label_size), np.float32) + if label_size: + label_id = np.random.randint(label_size) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + pth_label = None + tf_image = G.run(code, label) + tf_output = D.run(tf_image) + pth_image = G_pth(pth_code, label=pth_label)['image'] + pth_output = D_pth(pth_image) + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output[0] - pth_output[:, :1])) + if label_size: + distance += np.average(np.abs(tf_output[1] - pth_output[:, 1:])) + if verbose: + print(f' Test {i:03d}: D(G) distance {distance:.6e}.') + dg_distance += distance + + print(f'Average Gs distance is {gs_distance / test_num:.6e}.') + print(f'Average D(G) distance is {dg_distance / test_num:.6e}.') + print(f'========================================') + + if save_test_image: + html.save(f'{pth_weight_path}.conversion_test.html') + + sess.close() diff --git a/ContraCLIP/models/genforce/converters/pggan_official/README.md b/ContraCLIP/models/genforce/converters/pggan_official/README.md new file mode 100644 index 0000000000000000000000000000000000000000..49b05b69c57884d1cbe022d3c4dd674973b8fbca --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/README.md @@ -0,0 +1,174 @@ +## Progressive Growing of GANs for Improved Quality, Stability, and Variation
– Official TensorFlow implementation of the ICLR 2018 paper + +**Tero Karras** (NVIDIA), **Timo Aila** (NVIDIA), **Samuli Laine** (NVIDIA), **Jaakko Lehtinen** (NVIDIA and Aalto University) + +* For business inquiries, please contact **[researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)** +* For press and other inquiries, please contact Hector Marinez at **[hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)** + +![Representative image](https://raw.githubusercontent.com/tkarras/progressive_growing_of_gans/master/representative_image_512x256.png)
+**Picture:** Two imaginary celebrities that were dreamed up by a random number generator. + +**Abstract:**
+*We describe a new training methodology for generative adversarial networks. The key idea is to grow both the generator and discriminator progressively: starting from a low resolution, we add new layers that model increasingly fine details as training progresses. This both speeds the training up and greatly stabilizes it, allowing us to produce images of unprecedented quality, e.g., CelebA images at 1024². We also propose a simple way to increase the variation in generated images, and achieve a record inception score of 8.80 in unsupervised CIFAR10. Additionally, we describe several implementation details that are important for discouraging unhealthy competition between the generator and discriminator. Finally, we suggest a new metric for evaluating GAN results, both in terms of image quality and variation. As an additional contribution, we construct a higher-quality version of the CelebA dataset.* + +## Resources + +* [Paper (NVIDIA research)](http://research.nvidia.com/publication/2017-10_Progressive-Growing-of) +* [Paper (arXiv)](http://arxiv.org/abs/1710.10196) +* [Result video (YouTube)](https://youtu.be/G06dEcZ-QTg) +* [Additional material (Google Drive)](https://drive.google.com/open?id=0B4qLcYyJmiz0NHFULTdYc05lX0U) + * [ICLR 2018 poster (`karras2018iclr-poster.pdf`)](https://drive.google.com/open?id=1ilUVoIejsvG04G0PzFNVn3U3TjSSyHGu) + * [ICLR 2018 slides (`karras2018iclr-slides.pptx`)](https://drive.google.com/open?id=1jYlrX4DgTs2VAfRcyl3pcNI4ONkBg3-g) + * [Representative images (`images/representative-images`)](https://drive.google.com/open?id=0B4qLcYyJmiz0UE9zVHduWFVORlk) + * [High-quality video clips (`videos/high-quality-video-clips`)](https://drive.google.com/open?id=1gQu3O8ZhC-nko8wLFgcNqcwMnRYL_z85) + * [Huge collection of non-curated images for each dataset (`images/100k-generated-images`)](https://drive.google.com/open?id=1j6uZ_a6zci0HyKZdpDq9kSa8VihtEPCp) + * [Extensive video of random interpolations for each dataset (`videos/one-hour-of-random-interpolations`)](https://drive.google.com/open?id=1gAb3oqpaQFHZTwPUXHPIfBIP8eIeWNrI) + * [Pre-trained networks (`networks/tensorflow-version`)](https://drive.google.com/open?id=15hvzxt_XxuokSmj0uO4xxMTMWVc0cIMU) + * [Minimal example script for importing the pre-trained networks (`networks/tensorflow-version/example_import_script`)](https://drive.google.com/open?id=1A79qKDTFp6pExe4gTSgBsEOkxwa2oes_) + * [Data files needed to reconstruct the CelebA-HQ dataset (`datasets/celeba-hq-deltas`)](https://drive.google.com/open?id=0B4qLcYyJmiz0TXY1NG02bzZVRGs) + * [Example training logs and progress snapshots (`networks/tensorflow-version/example_training_runs`)](https://drive.google.com/open?id=1A9SKoQ7Xu2fqK22GHdMw8LZTh6qLvR7H) + +All the material, including source code, is made freely available for non-commercial use under the Creative Commons [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode) license. Feel free to use any of the material in your own work, as long as you give us appropriate credit by mentioning the title and author list of our paper. + +## Versions + +There are two different versions of the source code. The *TensorFlow version* is newer and more polished, and we generally recommend it as a starting point if you are looking to experiment with our technique, build upon it, or apply it to novel datasets. The *original Theano version*, on the other hand, is what we used to produce all the results shown in our paper. We recommend using it if – and only if – you are looking to reproduce our exact results for benchmark datasets like CIFAR-10, MNIST-RGB, and CelebA. + +The main differences are summarized in the following table: + +| Feature | TensorFlow version | Original Theano version | +| :-------------------------------- | :-------------------------------------------: | :-----------------------: | +| Branch | [master](https://github.com/tkarras/progressive_growing_of_gans/tree/master) (this branch) | [original-theano-version](https://github.com/tkarras/progressive_growing_of_gans/tree/original-theano-version) | +| Multi-GPU support | Yes | No | +| FP16 mixed-precision support | Yes | No | +| Performance | High | Low | +| Training time for CelebA-HQ | 2 days (8 GPUs)
2 weeks (1 GPU) | 1–2 months | +| Repro CelebA-HQ results | Yes – very close | Yes – identical | +| Repro LSUN results | Yes – very close | Yes – identical | +| Repro CIFAR-10 results | No | Yes – identical | +| Repro MNIST mode recovery | No | Yes – identical | +| Repro ablation study (Table 1) | No | Yes – identical | +| Dataset format | TFRecords | HDF5 | +| Backwards compatibility | Can import networks
trained with Theano | N/A | +| Code quality | Reasonable | Somewhat messy | +| Code status | In active use | No longer maintained | + +## System requirements + +* Both Linux and Windows are supported, but we strongly recommend Linux for performance and compatibility reasons. +* 64-bit Python 3.6 installation with numpy 1.13.3 or newer. We recommend Anaconda3. +* One or more high-end NVIDIA Pascal or Volta GPUs with 16GB of DRAM. We recommend NVIDIA DGX-1 with 8 Tesla V100 GPUs. +* NVIDIA driver 391.25 or newer, CUDA toolkit 9.0 or newer, cuDNN 7.1.2 or newer. +* Additional Python packages listed in `requirements-pip.txt` + +## Importing and using pre-trained networks + +All pre-trained networks found on Google Drive, as well as ones produced by the training script, are stored as Python PKL files. They can be imported using the standard `pickle` mechanism as long as two conditions are met: (1) The directory containing the Progressive GAN code repository must be included in the PYTHONPATH environment variable, and (2) a `tf.Session()` object must have been created beforehand and set as default. Each PKL file contains 3 instances of `tfutil.Network`: + +``` +# Import official CelebA-HQ networks. +with open('karras2018iclr-celebahq-1024x1024.pkl', 'rb') as file: + G, D, Gs = pickle.load(file) + # G = Instantaneous snapshot of the generator, mainly useful for resuming a previous training run. + # D = Instantaneous snapshot of the discriminator, mainly useful for resuming a previous training run. + # Gs = Long-term average of the generator, yielding higher-quality results than the instantaneous snapshot. +``` + +It is also possible to import networks that were produced using the Theano implementation, as long as they do not employ any features that are not natively supported by the TensorFlow version (minibatch discrimination, batch normalization, etc.). To enable Theano network import, however, you must use `misc.load_pkl()` in place of `pickle.load()`: + +``` +# Import Theano versions of the official CelebA-HQ networks. +import misc +G, D, Gs = misc.load_pkl('200-celebahq-1024x1024/network-final.pkl') +``` + +Once you have imported the networks, you can call `Gs.run()` to produce a set of images for given latent vectors, or `Gs.get_output_for()` to include the generator network in a larger TensorFlow expression. For further details, please consult the example script found on Google Drive. Instructions: + +1. Pull the Progressive GAN code repository and add it to your PYTHONPATH environment variable. +2. Install the required Python packages with `pip install -r requirements-pip.txt` +2. Download [`import_example.py`](https://drive.google.com/open?id=1xZul7DwqqJoe5OCuKHw6fQVeQZNIMSuF) from [`networks/tensorflow-version/example_import_script`](https://drive.google.com/open?id=1A79qKDTFp6pExe4gTSgBsEOkxwa2oes_) +3. Download [`karras2018iclr-celebahq-1024x1024.pkl`](https://drive.google.com/open?id=188K19ucknC6wg1R6jbuPEhTq9zoufOx4) from [`networks/tensorflow-version`](https://drive.google.com/open?id=15hvzxt_XxuokSmj0uO4xxMTMWVc0cIMU) and place it in the same directory as the script. +5. Run the script with `python import_example.py` +6. If everything goes well, the script should generate 10 PNG images (`img0.png` – `img9.png`) that match the ones found in [`networks/tensorflow-version/example_import_script`](https://drive.google.com/open?id=1A79qKDTFp6pExe4gTSgBsEOkxwa2oes_) exactly. + +## Preparing datasets for training + +The Progressive GAN code repository contains a command-line tool for recreating bit-exact replicas of the datasets that we used in the paper. The tool also provides various utilities for operating on the datasets: + +``` +usage: dataset_tool.py [-h] ... + + display Display images in dataset. + extract Extract images from dataset. + compare Compare two datasets. + create_mnist Create dataset for MNIST. + create_mnistrgb Create dataset for MNIST-RGB. + create_cifar10 Create dataset for CIFAR-10. + create_cifar100 Create dataset for CIFAR-100. + create_svhn Create dataset for SVHN. + create_lsun Create dataset for single LSUN category. + create_celeba Create dataset for CelebA. + create_celebahq Create dataset for CelebA-HQ. + create_from_images Create dataset from a directory full of images. + create_from_hdf5 Create dataset from legacy HDF5 archive. + +Type "dataset_tool.py -h" for more information. +``` + +The datasets are represented by directories containing the same image data in several resolutions to enable efficient streaming. There is a separate `*.tfrecords` file for each resolution, and if the dataset contains labels, they are stored in a separate file as well: + +``` +> python dataset_tool.py create_cifar10 datasets/cifar10 ~/downloads/cifar10 +> ls -la datasets/cifar10 +drwxr-xr-x 2 user user 7 Feb 21 10:07 . +drwxrwxr-x 10 user user 62 Apr 3 15:10 .. +-rw-r--r-- 1 user user 4900000 Feb 19 13:17 cifar10-r02.tfrecords +-rw-r--r-- 1 user user 12350000 Feb 19 13:17 cifar10-r03.tfrecords +-rw-r--r-- 1 user user 41150000 Feb 19 13:17 cifar10-r04.tfrecords +-rw-r--r-- 1 user user 156350000 Feb 19 13:17 cifar10-r05.tfrecords +-rw-r--r-- 1 user user 2000080 Feb 19 13:17 cifar10-rxx.labels +``` + +The ```create_*``` commands take the standard version of a given dataset as input and produce the corresponding `*.tfrecords` files as output. Additionally, the ```create_celebahq``` command requires a set of data files representing deltas with respect to the original CelebA dataset. These deltas (27.6GB) can be downloaded from [`datasets/celeba-hq-deltas`](https://drive.google.com/open?id=0B4qLcYyJmiz0TXY1NG02bzZVRGs). + +**Note about module versions**: Some of the dataset commands require specific versions of Python modules and system libraries (e.g. pillow, libjpeg), and they will give an error if the versions do not match. Please heed the error messages – there is **no way** to get the commands to work other than installing these specific versions. + +## Training networks + +Once the necessary datasets are set up, you can proceed to train your own networks. The general procedure is as follows: + +1. Edit `config.py` to specify the dataset and training configuration by uncommenting/editing specific lines. +2. Run the training script with `python train.py`. +3. The results are written into a newly created subdirectory under `config.result_dir` +4. Wait several days (or weeks) for the training to converge, and analyze the results. + +By default, `config.py` is configured to train a 1024x1024 network for CelebA-HQ using a single-GPU. This is expected to take about two weeks even on the highest-end NVIDIA GPUs. The key to enabling faster training is to employ multiple GPUs and/or go for a lower-resolution dataset. To this end, `config.py` contains several examples for commonly used datasets, as well as a set of "configuration presets" for multi-GPU training. All of the presets are expected to yield roughly the same image quality for CelebA-HQ, but their total training time can vary considerably: + +* `preset-v1-1gpu`: Original config that was used to produce the CelebA-HQ and LSUN results shown in the paper. Expected to take about 1 month on NVIDIA Tesla V100. +* `preset-v2-1gpu`: Optimized config that converges considerably faster than the original one. Expected to take about 2 weeks on 1xV100. +* `preset-v2-2gpus`: Optimized config for 2 GPUs. Takes about 1 week on 2xV100. +* `preset-v2-4gpus`: Optimized config for 4 GPUs. Takes about 3 days on 4xV100. +* `preset-v2-8gpus`: Optimized config for 8 GPUs. Takes about 2 days on 8xV100. + +For reference, the expected output of each configuration preset for CelebA-HQ can be found in [`networks/tensorflow-version/example_training_runs`](https://drive.google.com/open?id=1A9SKoQ7Xu2fqK22GHdMw8LZTh6qLvR7H) + +Other noteworthy config options: + +* `fp16`: Enable [FP16 mixed-precision training](http://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html) to reduce the training times even further. The actual speedup is heavily dependent on GPU architecture and cuDNN version, and it can be expected to increase considerably in the future. +* `BENCHMARK`: Quickly iterate through the resolutions to measure the raw training performance. +* `BENCHMARK0`: Same as `BENCHMARK`, but only use the highest resolution. +* `syn1024rgb`: Synthetic 1024x1024 dataset consisting of just black images. Useful for benchmarking. +* `VERBOSE`: Save image and network snapshots very frequently to facilitate debugging. +* `GRAPH` and `HIST`: Include additional data in the TensorBoard report. + +## Analyzing results + +Training results can be analyzed in several ways: + +* **Manual inspection**: The training script saves a snapshot of randomly generated images at regular intervals in `fakes*.png` and reports the overall progress in `log.txt`. +* **TensorBoard**: The training script also exports various running statistics in a `*.tfevents` file that can be visualized in TensorBoard with `tensorboard --logdir `. +* **Generating images and videos**: At the end of `config.py`, there are several pre-defined configs to launch utility scripts (`generate_*`). For example: + * Suppose you have an ongoing training run titled `010-pgan-celebahq-preset-v1-1gpu-fp32`, and you want to generate a video of random interpolations for the latest snapshot. + * Uncomment the `generate_interpolation_video` line in `config.py`, replace `run_id=10`, and run `python train.py` + * The script will automatically locate the latest network snapshot and create a new result directory containing a single MP4 file. +* **Quality metrics**: Similar to the previous example, `config.py` also contains pre-defined configs to compute various quality metrics (Sliced Wasserstein distance, Fréchet inception distance, etc.) for an existing training run. The metrics are computed for each network snapshot in succession and stored in `metric-*.txt` in the original result directory. diff --git a/ContraCLIP/models/genforce/converters/pggan_official/config.py b/ContraCLIP/models/genforce/converters/pggan_official/config.py new file mode 100644 index 0000000000000000000000000000000000000000..10031ac8b2bcc317dcd21cf73bf6539f789f7b91 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/config.py @@ -0,0 +1,140 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +#---------------------------------------------------------------------------- +# Convenience class that behaves exactly like dict(), but allows accessing +# the keys and values using the attribute syntax, i.e., "mydict.key = value". + +class EasyDict(dict): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + def __getattr__(self, name): return self[name] + def __setattr__(self, name, value): self[name] = value + def __delattr__(self, name): del self[name] + +#---------------------------------------------------------------------------- +# Paths. + +data_dir = 'datasets' +result_dir = 'results' + +#---------------------------------------------------------------------------- +# TensorFlow options. + +tf_config = EasyDict() # TensorFlow session config, set by tfutil.init_tf(). +env = EasyDict() # Environment variables, set by the main program in train.py. + +tf_config['graph_options.place_pruned_graph'] = True # False (default) = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. +#tf_config['gpu_options.allow_growth'] = False # False (default) = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. +#env.CUDA_VISIBLE_DEVICES = '0' # Unspecified (default) = Use all available GPUs. List of ints = CUDA device numbers to use. +env.TF_CPP_MIN_LOG_LEVEL = '1' # 0 (default) = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + +#---------------------------------------------------------------------------- +# Official training configs, targeted mainly for CelebA-HQ. +# To run, comment/uncomment the lines as appropriate and launch train.py. + +desc = 'pgan' # Description string included in result subdir name. +random_seed = 1000 # Global random seed. +dataset = EasyDict() # Options for dataset.load_dataset(). +train = EasyDict(func='train.train_progressive_gan') # Options for main training func. +G = EasyDict(func='networks.G_paper') # Options for generator network. +D = EasyDict(func='networks.D_paper') # Options for discriminator network. +G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. +D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. +G_loss = EasyDict(func='loss.G_wgan_acgan') # Options for generator loss. +D_loss = EasyDict(func='loss.D_wgangp_acgan') # Options for discriminator loss. +sched = EasyDict() # Options for train.TrainingSchedule. +grid = EasyDict(size='1080p', layout='random') # Options for train.setup_snapshot_image_grid(). + +# Dataset (choose one). +desc += '-celebahq'; dataset = EasyDict(tfrecord_dir='celebahq'); train.mirror_augment = True +#desc += '-celeba'; dataset = EasyDict(tfrecord_dir='celeba'); train.mirror_augment = True +#desc += '-cifar10'; dataset = EasyDict(tfrecord_dir='cifar10') +#desc += '-cifar100'; dataset = EasyDict(tfrecord_dir='cifar100') +#desc += '-svhn'; dataset = EasyDict(tfrecord_dir='svhn') +#desc += '-mnist'; dataset = EasyDict(tfrecord_dir='mnist') +#desc += '-mnistrgb'; dataset = EasyDict(tfrecord_dir='mnistrgb') +#desc += '-syn1024rgb'; dataset = EasyDict(class_name='dataset.SyntheticDataset', resolution=1024, num_channels=3) +#desc += '-lsun-airplane'; dataset = EasyDict(tfrecord_dir='lsun-airplane-100k'); train.mirror_augment = True +#desc += '-lsun-bedroom'; dataset = EasyDict(tfrecord_dir='lsun-bedroom-100k'); train.mirror_augment = True +#desc += '-lsun-bicycle'; dataset = EasyDict(tfrecord_dir='lsun-bicycle-100k'); train.mirror_augment = True +#desc += '-lsun-bird'; dataset = EasyDict(tfrecord_dir='lsun-bird-100k'); train.mirror_augment = True +#desc += '-lsun-boat'; dataset = EasyDict(tfrecord_dir='lsun-boat-100k'); train.mirror_augment = True +#desc += '-lsun-bottle'; dataset = EasyDict(tfrecord_dir='lsun-bottle-100k'); train.mirror_augment = True +#desc += '-lsun-bridge'; dataset = EasyDict(tfrecord_dir='lsun-bridge-100k'); train.mirror_augment = True +#desc += '-lsun-bus'; dataset = EasyDict(tfrecord_dir='lsun-bus-100k'); train.mirror_augment = True +#desc += '-lsun-car'; dataset = EasyDict(tfrecord_dir='lsun-car-100k'); train.mirror_augment = True +#desc += '-lsun-cat'; dataset = EasyDict(tfrecord_dir='lsun-cat-100k'); train.mirror_augment = True +#desc += '-lsun-chair'; dataset = EasyDict(tfrecord_dir='lsun-chair-100k'); train.mirror_augment = True +#desc += '-lsun-churchoutdoor'; dataset = EasyDict(tfrecord_dir='lsun-churchoutdoor-100k'); train.mirror_augment = True +#desc += '-lsun-classroom'; dataset = EasyDict(tfrecord_dir='lsun-classroom-100k'); train.mirror_augment = True +#desc += '-lsun-conferenceroom'; dataset = EasyDict(tfrecord_dir='lsun-conferenceroom-100k'); train.mirror_augment = True +#desc += '-lsun-cow'; dataset = EasyDict(tfrecord_dir='lsun-cow-100k'); train.mirror_augment = True +#desc += '-lsun-diningroom'; dataset = EasyDict(tfrecord_dir='lsun-diningroom-100k'); train.mirror_augment = True +#desc += '-lsun-diningtable'; dataset = EasyDict(tfrecord_dir='lsun-diningtable-100k'); train.mirror_augment = True +#desc += '-lsun-dog'; dataset = EasyDict(tfrecord_dir='lsun-dog-100k'); train.mirror_augment = True +#desc += '-lsun-horse'; dataset = EasyDict(tfrecord_dir='lsun-horse-100k'); train.mirror_augment = True +#desc += '-lsun-kitchen'; dataset = EasyDict(tfrecord_dir='lsun-kitchen-100k'); train.mirror_augment = True +#desc += '-lsun-livingroom'; dataset = EasyDict(tfrecord_dir='lsun-livingroom-100k'); train.mirror_augment = True +#desc += '-lsun-motorbike'; dataset = EasyDict(tfrecord_dir='lsun-motorbike-100k'); train.mirror_augment = True +#desc += '-lsun-person'; dataset = EasyDict(tfrecord_dir='lsun-person-100k'); train.mirror_augment = True +#desc += '-lsun-pottedplant'; dataset = EasyDict(tfrecord_dir='lsun-pottedplant-100k'); train.mirror_augment = True +#desc += '-lsun-restaurant'; dataset = EasyDict(tfrecord_dir='lsun-restaurant-100k'); train.mirror_augment = True +#desc += '-lsun-sheep'; dataset = EasyDict(tfrecord_dir='lsun-sheep-100k'); train.mirror_augment = True +#desc += '-lsun-sofa'; dataset = EasyDict(tfrecord_dir='lsun-sofa-100k'); train.mirror_augment = True +#desc += '-lsun-tower'; dataset = EasyDict(tfrecord_dir='lsun-tower-100k'); train.mirror_augment = True +#desc += '-lsun-train'; dataset = EasyDict(tfrecord_dir='lsun-train-100k'); train.mirror_augment = True +#desc += '-lsun-tvmonitor'; dataset = EasyDict(tfrecord_dir='lsun-tvmonitor-100k'); train.mirror_augment = True + +# Conditioning & snapshot options. +#desc += '-cond'; dataset.max_label_size = 'full' # conditioned on full label +#desc += '-cond1'; dataset.max_label_size = 1 # conditioned on first component of the label +#desc += '-g4k'; grid.size = '4k' +#desc += '-grpc'; grid.layout = 'row_per_class' + +# Config presets (choose one). +#desc += '-preset-v1-1gpu'; num_gpus = 1; D.mbstd_group_size = 16; sched.minibatch_base = 16; sched.minibatch_dict = {256: 14, 512: 6, 1024: 3}; sched.lod_training_kimg = 800; sched.lod_transition_kimg = 800; train.total_kimg = 19000 +desc += '-preset-v2-1gpu'; num_gpus = 1; sched.minibatch_base = 4; sched.minibatch_dict = {4: 128, 8: 128, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8, 512: 4}; sched.G_lrate_dict = {1024: 0.0015}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 +#desc += '-preset-v2-2gpus'; num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8}; sched.G_lrate_dict = {512: 0.0015, 1024: 0.002}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 +#desc += '-preset-v2-4gpus'; num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16}; sched.G_lrate_dict = {256: 0.0015, 512: 0.002, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 +#desc += '-preset-v2-8gpus'; num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32}; sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 + +# Numerical precision (choose one). +desc += '-fp32'; sched.max_minibatch_per_gpu = {256: 16, 512: 8, 1024: 4} +#desc += '-fp16'; G.dtype = 'float16'; D.dtype = 'float16'; G.pixelnorm_epsilon=1e-4; G_opt.use_loss_scaling = True; D_opt.use_loss_scaling = True; sched.max_minibatch_per_gpu = {512: 16, 1024: 8} + +# Disable individual features. +#desc += '-nogrowing'; sched.lod_initial_resolution = 1024; sched.lod_training_kimg = 0; sched.lod_transition_kimg = 0; train.total_kimg = 10000 +#desc += '-nopixelnorm'; G.use_pixelnorm = False +#desc += '-nowscale'; G.use_wscale = False; D.use_wscale = False +#desc += '-noleakyrelu'; G.use_leakyrelu = False +#desc += '-nosmoothing'; train.G_smoothing = 0.0 +#desc += '-norepeat'; train.minibatch_repeats = 1 +#desc += '-noreset'; train.reset_opt_for_new_lod = False + +# Special modes. +#desc += '-BENCHMARK'; sched.lod_initial_resolution = 4; sched.lod_training_kimg = 3; sched.lod_transition_kimg = 3; train.total_kimg = (8*2+1)*3; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1000; train.network_snapshot_ticks = 1000 +#desc += '-BENCHMARK0'; sched.lod_initial_resolution = 1024; train.total_kimg = 10; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1000; train.network_snapshot_ticks = 1000 +#desc += '-VERBOSE'; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1; train.network_snapshot_ticks = 100 +#desc += '-GRAPH'; train.save_tf_graph = True +#desc += '-HIST'; train.save_weight_histograms = True + +#---------------------------------------------------------------------------- +# Utility scripts. +# To run, uncomment the appropriate line and launch train.py. + +#train = EasyDict(func='util_scripts.generate_fake_images', run_id=23, num_pngs=1000); num_gpus = 1; desc = 'fake-images-' + str(train.run_id) +#train = EasyDict(func='util_scripts.generate_fake_images', run_id=23, grid_size=[15,8], num_pngs=10, image_shrink=4); num_gpus = 1; desc = 'fake-grids-' + str(train.run_id) +#train = EasyDict(func='util_scripts.generate_interpolation_video', run_id=23, grid_size=[1,1], duration_sec=60.0, smoothing_sec=1.0); num_gpus = 1; desc = 'interpolation-video-' + str(train.run_id) +#train = EasyDict(func='util_scripts.generate_training_video', run_id=23, duration_sec=20.0); num_gpus = 1; desc = 'training-video-' + str(train.run_id) + +#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-swd-16k.txt', metrics=['swd'], num_images=16384, real_passes=2); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) +#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-fid-10k.txt', metrics=['fid'], num_images=10000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) +#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-fid-50k.txt', metrics=['fid'], num_images=50000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) +#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-is-50k.txt', metrics=['is'], num_images=50000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) +#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-msssim-20k.txt', metrics=['msssim'], num_images=20000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/dataset.py b/ContraCLIP/models/genforce/converters/pggan_official/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..a56c236a05faff1098aad2910e56decd4a63faf9 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/dataset.py @@ -0,0 +1,241 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import glob +import numpy as np +import tensorflow as tf +import tfutil + +#---------------------------------------------------------------------------- +# Parse individual image from a tfrecords file. + +def parse_tfrecord_tf(record): + features = tf.parse_single_example(record, features={ + 'shape': tf.FixedLenFeature([3], tf.int64), + 'data': tf.FixedLenFeature([], tf.string)}) + data = tf.decode_raw(features['data'], tf.uint8) + return tf.reshape(data, features['shape']) + +def parse_tfrecord_np(record): + ex = tf.train.Example() + ex.ParseFromString(record) + shape = ex.features.feature['shape'].int64_list.value + data = ex.features.feature['data'].bytes_list.value[0] + return np.fromstring(data, np.uint8).reshape(shape) + +#---------------------------------------------------------------------------- +# Dataset class that loads data from tfrecords files. + +class TFRecordDataset: + def __init__(self, + tfrecord_dir, # Directory containing a collection of tfrecords files. + resolution = None, # Dataset resolution, None = autodetect. + label_file = None, # Relative path of the labels file, None = autodetect. + max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components. + repeat = True, # Repeat dataset indefinitely. + shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling. + prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching. + buffer_mb = 256, # Read buffer size (megabytes). + num_threads = 2): # Number of concurrent threads. + + self.tfrecord_dir = tfrecord_dir + self.resolution = None + self.resolution_log2 = None + self.shape = [] # [channel, height, width] + self.dtype = 'uint8' + self.dynamic_range = [0, 255] + self.label_file = label_file + self.label_size = None # [component] + self.label_dtype = None + self._np_labels = None + self._tf_minibatch_in = None + self._tf_labels_var = None + self._tf_labels_dataset = None + self._tf_datasets = dict() + self._tf_iterator = None + self._tf_init_ops = dict() + self._tf_minibatch_np = None + self._cur_minibatch = -1 + self._cur_lod = -1 + + # List tfrecords files and inspect their shapes. + assert os.path.isdir(self.tfrecord_dir) + tfr_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords'))) + assert len(tfr_files) >= 1 + tfr_shapes = [] + for tfr_file in tfr_files: + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): + tfr_shapes.append(parse_tfrecord_np(record).shape) + break + + # Autodetect label filename. + if self.label_file is None: + guess = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.labels'))) + if len(guess): + self.label_file = guess[0] + elif not os.path.isfile(self.label_file): + guess = os.path.join(self.tfrecord_dir, self.label_file) + if os.path.isfile(guess): + self.label_file = guess + + # Determine shape and resolution. + max_shape = max(tfr_shapes, key=lambda shape: np.prod(shape)) + self.resolution = resolution if resolution is not None else max_shape[1] + self.resolution_log2 = int(np.log2(self.resolution)) + self.shape = [max_shape[0], self.resolution, self.resolution] + tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes] + assert all(shape[0] == max_shape[0] for shape in tfr_shapes) + assert all(shape[1] == shape[2] for shape in tfr_shapes) + assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods)) + assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) + + # Load labels. + assert max_label_size == 'full' or max_label_size >= 0 + self._np_labels = np.zeros([1<<20, 0], dtype=np.float32) + if self.label_file is not None and max_label_size != 0: + self._np_labels = np.load(self.label_file) + assert self._np_labels.ndim == 2 + if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: + self._np_labels = self._np_labels[:, :max_label_size] + self.label_size = self._np_labels.shape[1] + self.label_dtype = self._np_labels.dtype.name + + # Build TF expressions. + with tf.name_scope('Dataset'), tf.device('/cpu:0'): + self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[]) + tf_labels_init = tf.zeros(self._np_labels.shape, self._np_labels.dtype) + self._tf_labels_var = tf.Variable(tf_labels_init, name='labels_var') + tfutil.set_vars({self._tf_labels_var: self._np_labels}) + self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var) + for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): + if tfr_lod < 0: + continue + dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20) + dset = dset.map(parse_tfrecord_tf, num_parallel_calls=num_threads) + dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) + bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize + if shuffle_mb > 0: + dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1) + if repeat: + dset = dset.repeat() + if prefetch_mb > 0: + dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1) + dset = dset.batch(self._tf_minibatch_in) + self._tf_datasets[tfr_lod] = dset + self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) + self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()} + + # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and lod in self._tf_datasets + if self._cur_minibatch != minibatch_size or self._cur_lod != lod: + self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) + self._cur_minibatch = minibatch_size + self._cur_lod = lod + + # Get next minibatch as TensorFlow expressions. + def get_minibatch_tf(self): # => images, labels + return self._tf_iterator.get_next() + + # Get next minibatch as NumPy arrays. + def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels + self.configure(minibatch_size, lod) + if self._tf_minibatch_np is None: + self._tf_minibatch_np = self.get_minibatch_tf() + return tfutil.run(self._tf_minibatch_np) + + # Get random labels as TensorFlow expression. + def get_random_labels_tf(self, minibatch_size): # => labels + if self.label_size > 0: + return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) + else: + return tf.zeros([minibatch_size, 0], self.label_dtype) + + # Get random labels as NumPy array. + def get_random_labels_np(self, minibatch_size): # => labels + if self.label_size > 0: + return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] + else: + return np.zeros([minibatch_size, 0], self.label_dtype) + +#---------------------------------------------------------------------------- +# Base class for datasets that are generated on the fly. + +class SyntheticDataset: + def __init__(self, resolution=1024, num_channels=3, dtype='uint8', dynamic_range=[0,255], label_size=0, label_dtype='float32'): + self.resolution = resolution + self.resolution_log2 = int(np.log2(resolution)) + self.shape = [num_channels, resolution, resolution] + self.dtype = dtype + self.dynamic_range = dynamic_range + self.label_size = label_size + self.label_dtype = label_dtype + self._tf_minibatch_var = None + self._tf_lod_var = None + self._tf_minibatch_np = None + self._tf_labels_np = None + + assert self.resolution == 2 ** self.resolution_log2 + with tf.name_scope('Dataset'): + self._tf_minibatch_var = tf.Variable(np.int32(0), name='minibatch_var') + self._tf_lod_var = tf.Variable(np.int32(0), name='lod_var') + + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and lod >= 0 and lod <= self.resolution_log2 + tfutil.set_vars({self._tf_minibatch_var: minibatch_size, self._tf_lod_var: lod}) + + def get_minibatch_tf(self): # => images, labels + with tf.name_scope('SyntheticDataset'): + shrink = tf.cast(2.0 ** tf.cast(self._tf_lod_var, tf.float32), tf.int32) + shape = [self.shape[0], self.shape[1] // shrink, self.shape[2] // shrink] + images = self._generate_images(self._tf_minibatch_var, self._tf_lod_var, shape) + labels = self._generate_labels(self._tf_minibatch_var) + return images, labels + + def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels + self.configure(minibatch_size, lod) + if self._tf_minibatch_np is None: + self._tf_minibatch_np = self.get_minibatch_tf() + return tfutil.run(self._tf_minibatch_np) + + def get_random_labels_tf(self, minibatch_size): # => labels + with tf.name_scope('SyntheticDataset'): + return self._generate_labels(minibatch_size) + + def get_random_labels_np(self, minibatch_size): # => labels + self.configure(minibatch_size) + if self._tf_labels_np is None: + self._tf_labels_np = self.get_random_labels_tf() + return tfutil.run(self._tf_labels_np) + + def _generate_images(self, minibatch, lod, shape): # to be overridden by subclasses + return tf.zeros([minibatch] + shape, self.dtype) + + def _generate_labels(self, minibatch): # to be overridden by subclasses + return tf.zeros([minibatch, self.label_size], self.label_dtype) + +#---------------------------------------------------------------------------- +# Helper func for constructing a dataset object using the given options. + +def load_dataset(class_name='dataset.TFRecordDataset', data_dir=None, verbose=False, **kwargs): + adjusted_kwargs = dict(kwargs) + if 'tfrecord_dir' in adjusted_kwargs and data_dir is not None: + adjusted_kwargs['tfrecord_dir'] = os.path.join(data_dir, adjusted_kwargs['tfrecord_dir']) + if verbose: + print('Streaming data using %s...' % class_name) + dataset = tfutil.import_obj(class_name)(**adjusted_kwargs) + if verbose: + print('Dataset shape =', np.int32(dataset.shape).tolist()) + print('Dynamic range =', dataset.dynamic_range) + print('Label size =', dataset.label_size) + return dataset + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/dataset_tool.py b/ContraCLIP/models/genforce/converters/pggan_official/dataset_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..f7861cb79fab70fa8060554a17b8e1553310381e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/dataset_tool.py @@ -0,0 +1,740 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import sys +import glob +import argparse +import threading +import six.moves.queue as Queue +import traceback +import numpy as np +import tensorflow as tf +import PIL.Image + +import tfutil +import dataset + +#---------------------------------------------------------------------------- + +def error(msg): + print('Error: ' + msg) + exit(1) + +#---------------------------------------------------------------------------- + +class TFRecordExporter: + def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10): + self.tfrecord_dir = tfrecord_dir + self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) + self.expected_images = expected_images + self.cur_images = 0 + self.shape = None + self.resolution_log2 = None + self.tfr_writers = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + if self.print_progress: + print('Creating dataset "%s"' % tfrecord_dir) + if not os.path.isdir(self.tfrecord_dir): + os.makedirs(self.tfrecord_dir) + assert(os.path.isdir(self.tfrecord_dir)) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for tfr_writer in self.tfr_writers: + tfr_writer.close() + self.tfr_writers = [] + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order. + order = np.arange(self.expected_images) + np.random.RandomState(123).shuffle(order) + return order + + def add_image(self, img): + if self.print_progress and self.cur_images % self.progress_interval == 0: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + if self.shape is None: + self.shape = img.shape + self.resolution_log2 = int(np.log2(self.shape[1])) + assert self.shape[0] in [1, 3] + assert self.shape[1] == self.shape[2] + assert self.shape[1] == 2**self.resolution_log2 + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for lod in range(self.resolution_log2 - 1): + tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod) + self.tfr_writers.append(tf.python_io.TFRecordWriter(tfr_file, tfr_opt)) + assert img.shape == self.shape + for lod, tfr_writer in enumerate(self.tfr_writers): + if lod: + img = img.astype(np.float32) + img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 + quant = np.rint(img).clip(0, 255).astype(np.uint8) + ex = tf.train.Example(features=tf.train.Features(feature={ + 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), + 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) + tfr_writer.write(ex.SerializeToString()) + self.cur_images += 1 + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + +#---------------------------------------------------------------------------- + +class ExceptionInfo(object): + def __init__(self): + self.value = sys.exc_info()[1] + self.traceback = traceback.format_exc() + +#---------------------------------------------------------------------------- + +class WorkerThread(threading.Thread): + def __init__(self, task_queue): + threading.Thread.__init__(self) + self.task_queue = task_queue + + def run(self): + while True: + func, args, result_queue = self.task_queue.get() + if func is None: + break + try: + result = func(*args) + except: + result = ExceptionInfo() + result_queue.put((result, args)) + +#---------------------------------------------------------------------------- + +class ThreadPool(object): + def __init__(self, num_threads): + assert num_threads >= 1 + self.task_queue = Queue.Queue() + self.result_queues = dict() + self.num_threads = num_threads + for idx in range(self.num_threads): + thread = WorkerThread(self.task_queue) + thread.daemon = True + thread.start() + + def add_task(self, func, args=()): + assert hasattr(func, '__call__') # must be a function + if func not in self.result_queues: + self.result_queues[func] = Queue.Queue() + self.task_queue.put((func, args, self.result_queues[func])) + + def get_result(self, func): # returns (result, args) + result, args = self.result_queues[func].get() + if isinstance(result, ExceptionInfo): + print('\n\nWorker thread caught an exception:\n' + result.traceback) + raise result.value + return result, args + + def finish(self): + for idx in range(self.num_threads): + self.task_queue.put((None, (), None)) + + def __enter__(self): # for 'with' statement + return self + + def __exit__(self, *excinfo): + self.finish() + + def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): + if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4 + assert max_items_in_flight >= 1 + results = [] + retire_idx = [0] + + def task_func(prepared, idx): + return process_func(prepared) + + def retire_result(): + processed, (prepared, idx) = self.get_result(task_func) + results[idx] = processed + while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: + yield post_func(results[retire_idx[0]]) + results[retire_idx[0]] = None + retire_idx[0] += 1 + + for idx, item in enumerate(item_iterator): + prepared = pre_func(item) + results.append(None) + self.add_task(func=task_func, args=(prepared, idx)) + while retire_idx[0] < idx - max_items_in_flight + 2: + for res in retire_result(): yield res + while retire_idx[0] < len(results): + for res in retire_result(): yield res + +#---------------------------------------------------------------------------- + +def display(tfrecord_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tfutil.init_tf({'gpu_options.allow_growth': True}) + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) + tfutil.init_uninited_vars() + + idx = 0 + while True: + try: + images, labels = dset.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + break + if idx == 0: + print('Displaying images') + import cv2 # pip install opencv-python + cv2.namedWindow('dataset_tool') + print('Press SPACE or ENTER to advance, ESC to exit') + print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) + cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR + idx += 1 + if cv2.waitKey() == 27: + break + print('\nDisplayed %d images.' % idx) + +#---------------------------------------------------------------------------- + +def extract(tfrecord_dir, output_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tfutil.init_tf({'gpu_options.allow_growth': True}) + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) + tfutil.init_uninited_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + try: + images, labels = dset.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + img.save(os.path.join(output_dir, 'img%08d.png' % idx)) + idx += 1 + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): + max_label_size = 0 if ignore_labels else 'full' + print('Loading dataset "%s"' % tfrecord_dir_a) + tfutil.init_tf({'gpu_options.allow_growth': True}) + dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) + print('Loading dataset "%s"' % tfrecord_dir_b) + dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) + tfutil.init_uninited_vars() + + print('Comparing datasets') + idx = 0 + identical_images = 0 + identical_labels = 0 + while True: + if idx % 100 == 0: + print('%d\r' % idx, end='', flush=True) + try: + images_a, labels_a = dset_a.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + images_a, labels_a = None, None + try: + images_b, labels_b = dset_b.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + images_b, labels_b = None, None + if images_a is None or images_b is None: + if images_a is not None or images_b is not None: + print('Datasets contain different number of images') + break + if images_a.shape == images_b.shape and np.all(images_a == images_b): + identical_images += 1 + else: + print('Image %d is different' % idx) + if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): + identical_labels += 1 + else: + print('Label %d is different' % idx) + idx += 1 + print('Identical images: %d / %d' % (identical_images, idx)) + if not ignore_labels: + print('Identical labels: %d / %d' % (identical_labels, idx)) + +#---------------------------------------------------------------------------- + +def create_mnist(tfrecord_dir, mnist_dir): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: + labels = np.frombuffer(file.read(), np.uint8, offset=8) + images = images.reshape(-1, 1, 28, 28) + images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (60000,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + images = images.reshape(-1, 28, 28) + images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + + with TFRecordExporter(tfrecord_dir, num_images) as tfr: + rnd = np.random.RandomState(random_seed) + for idx in range(num_images): + tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) + +#---------------------------------------------------------------------------- + +def create_cifar10(tfrecord_dir, cifar10_dir): + print('Loading CIFAR-10 from "%s"' % cifar10_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 6): + with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data['data'].reshape(-1, 3, 32, 32)) + labels.append(data['labels']) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_cifar100(tfrecord_dir, cifar100_dir): + print('Loading CIFAR-100 from "%s"' % cifar100_dir) + import pickle + with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images = data['data'].reshape(-1, 3, 32, 32) + labels = np.array(data['fine_labels']) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 99 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_svhn(tfrecord_dir, svhn_dir): + print('Loading SVHN from "%s"' % svhn_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 4): + with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data[0]) + labels.append(data[1]) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (73257,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images) as tfr: + for idx, (key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + crop = np.min(img.shape[:2]) + img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + +#---------------------------------------------------------------------------- + +def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): + print('Loading CelebA from "%s"' % celeba_dir) + glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') + image_filenames = sorted(glob.glob(glob_pattern)) + expected_images = 202599 + if len(image_filenames) != expected_images: + error('Expected to find %d images' % expected_images) + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + assert img.shape == (218, 178, 3) + img = img[cy - 64 : cy + 64, cx - 64 : cx + 64] + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_celebahq(tfrecord_dir, celeba_dir, delta_dir, num_threads=4, num_tasks=100): + print('Loading CelebA from "%s"' % celeba_dir) + expected_images = 202599 + if len(glob.glob(os.path.join(celeba_dir, 'img_celeba', '*.jpg'))) != expected_images: + error('Expected to find %d images' % expected_images) + with open(os.path.join(celeba_dir, 'Anno', 'list_landmarks_celeba.txt'), 'rt') as file: + landmarks = [[float(value) for value in line.split()[1:]] for line in file.readlines()[2:]] + landmarks = np.float32(landmarks).reshape(-1, 5, 2) + + print('Loading CelebA-HQ deltas from "%s"' % delta_dir) + import scipy.ndimage + import hashlib + import bz2 + import zipfile + import base64 + import cryptography.hazmat.primitives.hashes + import cryptography.hazmat.backends + import cryptography.hazmat.primitives.kdf.pbkdf2 + import cryptography.fernet + expected_zips = 30 + if len(glob.glob(os.path.join(delta_dir, 'delta*.zip'))) != expected_zips: + error('Expected to find %d zips' % expected_zips) + with open(os.path.join(delta_dir, 'image_list.txt'), 'rt') as file: + lines = [line.split() for line in file] + fields = dict() + for idx, field in enumerate(lines[0]): + type = int if field.endswith('idx') else str + fields[field] = [type(line[idx]) for line in lines[1:]] + indices = np.array(fields['idx']) + + # Must use pillow version 3.1.1 for everything to work correctly. + if getattr(PIL, 'PILLOW_VERSION', '') != '3.1.1': + error('create_celebahq requires pillow version 3.1.1') # conda install pillow=3.1.1 + + # Must use libjpeg version 8d for everything to work correctly. + img = np.array(PIL.Image.open(os.path.join(celeba_dir, 'img_celeba', '000001.jpg'))) + md5 = hashlib.md5() + md5.update(img.tobytes()) + if md5.hexdigest() != '9cad8178d6cb0196b36f7b34bc5eb6d3': + error('create_celebahq requires libjpeg version 8d') # conda install jpeg=8d + + def rot90(v): + return np.array([-v[1], v[0]]) + + def process_func(idx): + # Load original image. + orig_idx = fields['orig_idx'][idx] + orig_file = fields['orig_file'][idx] + orig_path = os.path.join(celeba_dir, 'img_celeba', orig_file) + img = PIL.Image.open(orig_path) + + # Choose oriented crop rectangle. + lm = landmarks[orig_idx] + eye_avg = (lm[0] + lm[1]) * 0.5 + 0.5 + mouth_avg = (lm[3] + lm[4]) * 0.5 + 0.5 + eye_to_eye = lm[1] - lm[0] + eye_to_mouth = mouth_avg - eye_avg + x = eye_to_eye - rot90(eye_to_mouth) + x /= np.hypot(*x) + x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) + y = rot90(x) + c = eye_avg + eye_to_mouth * 0.1 + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + zoom = 1024 / (np.hypot(*x) * 2) + + # Shrink. + shrink = int(np.floor(0.5 / zoom)) + if shrink > 1: + size = (int(np.round(float(img.size[0]) / shrink)), int(np.round(float(img.size[1]) / shrink))) + img = img.resize(size, PIL.Image.ANTIALIAS) + quad /= shrink + zoom *= shrink + + # Crop. + border = max(int(np.round(1024 * 0.1 / zoom)), 3) + crop = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1])))) + crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1])) + if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: + img = img.crop(crop) + quad -= crop[0:2] + + # Simulate super-resolution. + superres = int(np.exp2(np.ceil(np.log2(zoom)))) + if superres > 1: + img = img.resize((img.size[0] * superres, img.size[1] * superres), PIL.Image.ANTIALIAS) + quad *= superres + zoom /= superres + + # Pad. + pad = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1])))) + pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0)) + if max(pad) > border - 4: + pad = np.maximum(pad, int(np.round(1024 * 0.3 / zoom))) + img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') + h, w, _ = img.shape + y, x, _ = np.mgrid[:h, :w, :1] + mask = 1.0 - np.minimum(np.minimum(np.float32(x) / pad[0], np.float32(y) / pad[1]), np.minimum(np.float32(w-1-x) / pad[2], np.float32(h-1-y) / pad[3])) + blur = 1024 * 0.02 / zoom + img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) + img += (np.median(img, axis=(0,1)) - img) * np.clip(mask, 0.0, 1.0) + img = PIL.Image.fromarray(np.uint8(np.clip(np.round(img), 0, 255)), 'RGB') + quad += pad[0:2] + + # Transform. + img = img.transform((4096, 4096), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR) + img = img.resize((1024, 1024), PIL.Image.ANTIALIAS) + img = np.asarray(img).transpose(2, 0, 1) + + # Verify MD5. + md5 = hashlib.md5() + md5.update(img.tobytes()) + assert md5.hexdigest() == fields['proc_md5'][idx] + + # Load delta image and original JPG. + with zipfile.ZipFile(os.path.join(delta_dir, 'deltas%05d.zip' % (idx - idx % 1000)), 'r') as zip: + delta_bytes = zip.read('delta%05d.dat' % idx) + with open(orig_path, 'rb') as file: + orig_bytes = file.read() + + # Decrypt delta image, using original JPG data as decryption key. + algorithm = cryptography.hazmat.primitives.hashes.SHA256() + backend = cryptography.hazmat.backends.default_backend() + salt = bytes(orig_file, 'ascii') + kdf = cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC(algorithm=algorithm, length=32, salt=salt, iterations=100000, backend=backend) + key = base64.urlsafe_b64encode(kdf.derive(orig_bytes)) + delta = np.frombuffer(bz2.decompress(cryptography.fernet.Fernet(key).decrypt(delta_bytes)), dtype=np.uint8).reshape(3, 1024, 1024) + + # Apply delta image. + img = img + delta + + # Verify MD5. + md5 = hashlib.md5() + md5.update(img.tobytes()) + assert md5.hexdigest() == fields['final_md5'][idx] + return img + + with TFRecordExporter(tfrecord_dir, indices.size) as tfr: + order = tfr.choose_shuffled_order() + with ThreadPool(num_threads) as pool: + for img in pool.process_items_concurrently(indices[order].tolist(), process_func=process_func, max_items_in_flight=num_tasks): + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_images(tfrecord_dir, image_dir, shuffle): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): + print('Loading HDF5 archive from "%s"' % hdf5_filename) + import h5py # conda install h5py + with h5py.File(hdf5_filename, 'r') as hdf5_file: + hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3]) + with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0]) + for idx in range(order.size): + tfr.add_image(hdf5_data[order[idx]]) + npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' + if os.path.isfile(npy_filename): + tfr.add_labels(np.load(npy_filename)[order]) + +#---------------------------------------------------------------------------- + +def execute_cmdline(argv): + prog = argv[0] + parser = argparse.ArgumentParser( + prog = prog, + description = 'Tool for creating, extracting, and visualizing Progressive GAN datasets.', + epilog = 'Type "%s -h" for more information.' % prog) + + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + def add_command(cmd, desc, example=None): + epilog = 'Example: %s %s' % (prog, example) if example is not None else None + return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) + + p = add_command( 'display', 'Display images in dataset.', + 'display datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'extract', 'Extract images from dataset.', + 'extract datasets/mnist mnist-images') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + p.add_argument( 'output_dir', help='Directory to extract the images into') + + p = add_command( 'compare', 'Compare two datasets.', + 'compare datasets/mydataset datasets/mnist') + p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset') + p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_mnist', 'Create dataset for MNIST.', + 'create_mnist datasets/mnist ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + + p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.', + 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000) + p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123) + + p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.', + 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10') + + p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.', + 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100') + + p = add_command( 'create_svhn', 'Create dataset for SVHN.', + 'create_svhn datasets/svhn ~/downloads/svhn') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'svhn_dir', help='Directory containing SVHN') + + p = add_command( 'create_lsun', 'Create dataset for single LSUN category.', + 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_celeba', 'Create dataset for CelebA.', + 'create_celeba datasets/celeba ~/downloads/celeba') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'celeba_dir', help='Directory containing CelebA') + p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89) + p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) + + p = add_command( 'create_celebahq', 'Create dataset for CelebA-HQ.', + 'create_celebahq datasets/celebahq ~/downloads/celeba ~/downloads/celeba-hq-deltas') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'celeba_dir', help='Directory containing CelebA') + p.add_argument( 'delta_dir', help='Directory containing CelebA-HQ deltas') + p.add_argument( '--num_threads', help='Number of concurrent threads (default: 4)', type=int, default=4) + p.add_argument( '--num_tasks', help='Number of concurrent processing tasks (default: 100)', type=int, default=100) + + p = add_command( 'create_from_images', 'Create dataset from a directory full of images.', + 'create_from_images datasets/mydataset myimagedir') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.', + 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) + func = globals()[args.command] + del args.command + func(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + execute_cmdline(sys.argv) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/legacy.py b/ContraCLIP/models/genforce/converters/pggan_official/legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..ebce17987b5515fad02c310c0e1c7565942c80ea --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/legacy.py @@ -0,0 +1,117 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import pickle +import inspect +import numpy as np + +import tfutil +import networks + +#---------------------------------------------------------------------------- +# Custom unpickler that is able to load network pickles produced by +# the old Theano implementation. + +class LegacyUnpickler(pickle.Unpickler): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def find_class(self, module, name): + if module == 'network' and name == 'Network': + return tfutil.Network + return super().find_class(module, name) + +#---------------------------------------------------------------------------- +# Import handler for tfutil.Network that silently converts networks produced +# by the old Theano implementation to a suitable format. + +theano_gan_remap = { + 'G_paper': 'G_paper', + 'G_progressive_8': 'G_paper', + 'D_paper': 'D_paper', + 'D_progressive_8': 'D_paper'} + +def patch_theano_gan(state): + if 'version' in state or state['build_func_spec']['func'] not in theano_gan_remap: + return state + + spec = dict(state['build_func_spec']) + func = spec.pop('func') + resolution = spec.get('resolution', 32) + resolution_log2 = int(np.log2(resolution)) + use_wscale = spec.get('use_wscale', True) + + assert spec.pop('label_size', 0) == 0 + assert spec.pop('use_batchnorm', False) == False + assert spec.pop('tanh_at_end', None) is None + assert spec.pop('mbstat_func', 'Tstdeps') == 'Tstdeps' + assert spec.pop('mbstat_avg', 'all') == 'all' + assert spec.pop('mbdisc_kernels', None) is None + spec.pop( 'use_gdrop', True) # doesn't make a difference + assert spec.pop('use_layernorm', False) == False + spec[ 'fused_scale'] = False + spec[ 'mbstd_group_size'] = 16 + + vars = [] + param_iter = iter(state['param_values']) + relu = np.sqrt(2); linear = 1.0 + def flatten2(w): return w.reshape(w.shape[0], -1) + def he_std(gain, w): return gain / np.sqrt(np.prod(w.shape[:-1])) + def wscale(gain, w): return w * next(param_iter) / he_std(gain, w) if use_wscale else w + def layer(name, gain, w): return [(name + '/weight', wscale(gain, w)), (name + '/bias', next(param_iter))] + + if func.startswith('G'): + vars += layer('4x4/Dense', relu/4, flatten2(next(param_iter).transpose(1,0,2,3))) + vars += layer('4x4/Conv', relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1]) + for res in range(3, resolution_log2 + 1): + vars += layer('%dx%d/Conv0' % (2**res, 2**res), relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1]) + vars += layer('%dx%d/Conv1' % (2**res, 2**res), relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1]) + for lod in range(0, resolution_log2 - 1): + vars += layer('ToRGB_lod%d' % lod, linear, next(param_iter)[np.newaxis, np.newaxis]) + + if func.startswith('D'): + vars += layer('FromRGB_lod0', relu, next(param_iter)[np.newaxis, np.newaxis]) + for res in range(resolution_log2, 2, -1): + vars += layer('%dx%d/Conv0' % (2**res, 2**res), relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1]) + vars += layer('%dx%d/Conv1' % (2**res, 2**res), relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1]) + vars += layer('FromRGB_lod%d' % (resolution_log2 - (res - 1)), relu, next(param_iter)[np.newaxis, np.newaxis]) + vars += layer('4x4/Conv', relu, next(param_iter).transpose(2,3,1,0)[::-1,::-1]) + vars += layer('4x4/Dense0', relu, flatten2(next(param_iter)[:,:,::-1,::-1]).transpose()) + vars += layer('4x4/Dense1', linear, next(param_iter)) + + vars += [('lod', state['toplevel_params']['cur_lod'])] + + return { + 'version': 2, + 'name': func, + 'build_module_src': inspect.getsource(networks), + 'build_func_name': theano_gan_remap[func], + 'static_kwargs': spec, + 'variables': vars} + +tfutil.network_import_handlers.append(patch_theano_gan) + +#---------------------------------------------------------------------------- +# Import handler for tfutil.Network that ignores unsupported/deprecated +# networks produced by older versions of the code. + +def ignore_unknown_theano_network(state): + if 'version' in state: + return state + + print('Ignoring unknown Theano network:', state['build_func_spec']['func']) + return { + 'version': 2, + 'name': 'Dummy', + 'build_module_src': 'def dummy(input, **kwargs): input.set_shape([None, 1]); return input', + 'build_func_name': 'dummy', + 'static_kwargs': {}, + 'variables': []} + +tfutil.network_import_handlers.append(ignore_unknown_theano_network) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/loss.py b/ContraCLIP/models/genforce/converters/pggan_official/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..b485d50954c01e99dd5568fe4b91aaca5599902a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/loss.py @@ -0,0 +1,82 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import numpy as np +import tensorflow as tf + +import tfutil + +#---------------------------------------------------------------------------- +# Convenience func that casts all of its arguments to tf.float32. + +def fp32(*values): + if len(values) == 1 and isinstance(values[0], tuple): + values = values[0] + values = tuple(tf.cast(v, tf.float32) for v in values) + return values if len(values) >= 2 else values[0] + +#---------------------------------------------------------------------------- +# Generator loss function used in the paper (WGAN + AC-GAN). + +def G_wgan_acgan(G, D, opt, training_set, minibatch_size, + cond_weight = 1.0): # Weight of the conditioning term. + + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out, fake_labels_out = fp32(D.get_output_for(fake_images_out, is_training=True)) + loss = -fake_scores_out + + if D.output_shapes[1][1] > 0: + with tf.name_scope('LabelPenalty'): + label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fake_labels_out) + loss += label_penalty_fakes * cond_weight + return loss + +#---------------------------------------------------------------------------- +# Discriminator loss function used in the paper (WGAN-GP + AC-GAN). + +def D_wgangp_acgan(G, D, opt, training_set, minibatch_size, reals, labels, + wgan_lambda = 10.0, # Weight for the gradient penalty term. + wgan_epsilon = 0.001, # Weight for the epsilon term, \epsilon_{drift}. + wgan_target = 1.0, # Target value for gradient magnitudes. + cond_weight = 1.0): # Weight of the conditioning terms. + + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out, real_labels_out = fp32(D.get_output_for(reals, is_training=True)) + fake_scores_out, fake_labels_out = fp32(D.get_output_for(fake_images_out, is_training=True)) + real_scores_out = tfutil.autosummary('Loss/real_scores', real_scores_out) + fake_scores_out = tfutil.autosummary('Loss/fake_scores', fake_scores_out) + loss = fake_scores_out - real_scores_out + + with tf.name_scope('GradientPenalty'): + mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) + mixed_images_out = tfutil.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) + mixed_scores_out, mixed_labels_out = fp32(D.get_output_for(mixed_images_out, is_training=True)) + mixed_scores_out = tfutil.autosummary('Loss/mixed_scores', mixed_scores_out) + mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) + mixed_grads = opt.undo_loss_scaling(fp32(tf.gradients(mixed_loss, [mixed_images_out])[0])) + mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3])) + mixed_norms = tfutil.autosummary('Loss/mixed_norms', mixed_norms) + gradient_penalty = tf.square(mixed_norms - wgan_target) + loss += gradient_penalty * (wgan_lambda / (wgan_target**2)) + + with tf.name_scope('EpsilonPenalty'): + epsilon_penalty = tfutil.autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) + loss += epsilon_penalty * wgan_epsilon + + if D.output_shapes[1][1] > 0: + with tf.name_scope('LabelPenalty'): + label_penalty_reals = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=real_labels_out) + label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fake_labels_out) + label_penalty_reals = tfutil.autosummary('Loss/label_penalty_reals', label_penalty_reals) + label_penalty_fakes = tfutil.autosummary('Loss/label_penalty_fakes', label_penalty_fakes) + loss += (label_penalty_reals + label_penalty_fakes) * cond_weight + return loss + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/metrics/__init__.py b/ContraCLIP/models/genforce/converters/pggan_official/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1bb8bf6d7fd4c8d09aea89b47de20fb8bbb61626 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/metrics/__init__.py @@ -0,0 +1 @@ +# empty diff --git a/ContraCLIP/models/genforce/converters/pggan_official/metrics/frechet_inception_distance.py b/ContraCLIP/models/genforce/converters/pggan_official/metrics/frechet_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..565bd36e8f587a5ceec441710f6fdae2ce14fe99 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/metrics/frechet_inception_distance.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +# +# Copyright 2017 Martin Heusel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Adapted from the original implementation by Martin Heusel. +# Source https://github.com/bioinf-jku/TTUR/blob/master/fid.py + +''' Calculates the Frechet Inception Distance (FID) to evalulate GANs. + +The FID metric calculates the distance between two distributions of images. +Typically, we have summary statistics (mean & covariance matrix) of one +of these distributions, while the 2nd distribution is given by a GAN. + +When run as a stand-alone program, it compares the distribution of +images that are stored as PNG/JPEG at a specified location with a +distribution given by summary statistics (in pickle format). + +The FID is calculated by assuming that X_1 and X_2 are the activations of +the pool_3 layer of the inception net for generated samples and real world +samples respectivly. + +See --help to see further details. +''' + +from __future__ import absolute_import, division, print_function +import numpy as np +import scipy as sp +import os +import gzip, pickle +import tensorflow as tf +from scipy.misc import imread +import pathlib +import urllib + + +class InvalidFIDException(Exception): + pass + + +def create_inception_graph(pth): + """Creates a graph from saved GraphDef file.""" + # Creates graph from saved graph_def.pb. + with tf.gfile.FastGFile( pth, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString( f.read()) + _ = tf.import_graph_def( graph_def, name='FID_Inception_Net') +#------------------------------------------------------------------------------- + + +# code for handling inception net derived from +# https://github.com/openai/improved-gan/blob/master/inception_score/model.py +def _get_inception_layer(sess): + """Prepares inception net for batched usage and returns pool_3 layer. """ + layername = 'FID_Inception_Net/pool_3:0' + pool3 = sess.graph.get_tensor_by_name(layername) + ops = pool3.graph.get_operations() + for op_idx, op in enumerate(ops): + for o in op.outputs: + shape = o.get_shape() + if shape._dims is not None: + shape = [s.value for s in shape] + new_shape = [] + for j, s in enumerate(shape): + if s == 1 and j == 0: + new_shape.append(None) + else: + new_shape.append(s) + try: + o._shape = tf.TensorShape(new_shape) + except ValueError: + o._shape_val = tf.TensorShape(new_shape) # EDIT: added for compatibility with tensorflow 1.6.0 + return pool3 +#------------------------------------------------------------------------------- + + +def get_activations(images, sess, batch_size=50, verbose=False): + """Calculates the activations of the pool_3 layer for all images. + + Params: + -- images : Numpy array of dimension (n_images, hi, wi, 3). The values + must lie between 0 and 256. + -- sess : current session + -- batch_size : the images numpy array is split into batches with batch size + batch_size. A reasonable batch size depends on the disposable hardware. + -- verbose : If set to True and parameter out_step is given, the number of calculated + batches is reported. + Returns: + -- A numpy array of dimension (num images, 2048) that contains the + activations of the given tensor when feeding inception with the query tensor. + """ + inception_layer = _get_inception_layer(sess) + d0 = images.shape[0] + if batch_size > d0: + print("warning: batch size is bigger than the data size. setting batch size to data size") + batch_size = d0 + n_batches = d0//batch_size + n_used_imgs = n_batches*batch_size + pred_arr = np.empty((n_used_imgs,2048)) + for i in range(n_batches): + if verbose: + print("\rPropagating batch %d/%d" % (i+1, n_batches), end="", flush=True) + start = i*batch_size + end = start + batch_size + batch = images[start:end] + pred = sess.run(inception_layer, {'FID_Inception_Net/ExpandDims:0': batch}) + pred_arr[start:end] = pred.reshape(batch_size,-1) + if verbose: + print(" done") + return pred_arr +#------------------------------------------------------------------------------- + + +def calculate_frechet_distance(mu1, sigma1, mu2, sigma2): + """Numpy implementation of the Frechet Distance. + The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) + and X_2 ~ N(mu_2, C_2) is + d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). + + Params: + -- mu1 : Numpy array containing the activations of the pool_3 layer of the + inception net ( like returned by the function 'get_predictions') + -- mu2 : The sample mean over activations of the pool_3 layer, precalcualted + on an representive data set. + -- sigma2: The covariance matrix over activations of the pool_3 layer, + precalcualted on an representive data set. + + Returns: + -- dist : The Frechet Distance. + + Raises: + -- InvalidFIDException if nan occures. + """ + m = np.square(mu1 - mu2).sum() + #s = sp.linalg.sqrtm(np.dot(sigma1, sigma2)) # EDIT: commented out + s, _ = sp.linalg.sqrtm(np.dot(sigma1, sigma2), disp=False) # EDIT: added + dist = m + np.trace(sigma1+sigma2 - 2*s) + #if np.isnan(dist): # EDIT: commented out + # raise InvalidFIDException("nan occured in distance calculation.") # EDIT: commented out + #return dist # EDIT: commented out + return np.real(dist) # EDIT: added +#------------------------------------------------------------------------------- + + +def calculate_activation_statistics(images, sess, batch_size=50, verbose=False): + """Calculation of the statistics used by the FID. + Params: + -- images : Numpy array of dimension (n_images, hi, wi, 3). The values + must lie between 0 and 255. + -- sess : current session + -- batch_size : the images numpy array is split into batches with batch size + batch_size. A reasonable batch size depends on the available hardware. + -- verbose : If set to True and parameter out_step is given, the number of calculated + batches is reported. + Returns: + -- mu : The mean over samples of the activations of the pool_3 layer of + the incption model. + -- sigma : The covariance matrix of the activations of the pool_3 layer of + the incption model. + """ + act = get_activations(images, sess, batch_size, verbose) + mu = np.mean(act, axis=0) + sigma = np.cov(act, rowvar=False) + return mu, sigma +#------------------------------------------------------------------------------- + + +#------------------------------------------------------------------------------- +# The following functions aren't needed for calculating the FID +# they're just here to make this module work as a stand-alone script +# for calculating FID scores +#------------------------------------------------------------------------------- +def check_or_download_inception(inception_path): + ''' Checks if the path to the inception file is valid, or downloads + the file if it is not present. ''' + INCEPTION_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' + if inception_path is None: + inception_path = '/tmp' + inception_path = pathlib.Path(inception_path) + model_file = inception_path / 'classify_image_graph_def.pb' + if not model_file.exists(): + print("Downloading Inception model") + from urllib import request + import tarfile + fn, _ = request.urlretrieve(INCEPTION_URL) + with tarfile.open(fn, mode='r') as f: + f.extract('classify_image_graph_def.pb', str(model_file.parent)) + return str(model_file) + + +def _handle_path(path, sess): + if path.endswith('.npz'): + f = np.load(path) + m, s = f['mu'][:], f['sigma'][:] + f.close() + else: + path = pathlib.Path(path) + files = list(path.glob('*.jpg')) + list(path.glob('*.png')) + x = np.array([imread(str(fn)).astype(np.float32) for fn in files]) + m, s = calculate_activation_statistics(x, sess) + return m, s + + +def calculate_fid_given_paths(paths, inception_path): + ''' Calculates the FID of two paths. ''' + inception_path = check_or_download_inception(inception_path) + + for p in paths: + if not os.path.exists(p): + raise RuntimeError("Invalid path: %s" % p) + + create_inception_graph(str(inception_path)) + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + m1, s1 = _handle_path(paths[0], sess) + m2, s2 = _handle_path(paths[1], sess) + fid_value = calculate_frechet_distance(m1, s1, m2, s2) + return fid_value + + +if __name__ == "__main__": + from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter + parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) + parser.add_argument("path", type=str, nargs=2, + help='Path to the generated images or to .npz statistic files') + parser.add_argument("-i", "--inception", type=str, default=None, + help='Path to Inception model (will be downloaded if not provided)') + parser.add_argument("--gpu", default="", type=str, + help='GPU to use (leave blank for CPU only)') + args = parser.parse_args() + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + fid_value = calculate_fid_given_paths(args.path, args.inception) + print("FID: ", fid_value) + +#---------------------------------------------------------------------------- +# EDIT: added + +class API: + def __init__(self, num_images, image_shape, image_dtype, minibatch_size): + import config + self.network_dir = os.path.join(config.result_dir, '_inception_fid') + self.network_file = check_or_download_inception(self.network_dir) + self.sess = tf.get_default_session() + create_inception_graph(self.network_file) + + def get_metric_names(self): + return ['FID'] + + def get_metric_formatting(self): + return ['%-10.4f'] + + def begin(self, mode): + assert mode in ['warmup', 'reals', 'fakes'] + self.activations = [] + + def feed(self, mode, minibatch): + act = get_activations(minibatch.transpose(0,2,3,1), self.sess, batch_size=minibatch.shape[0]) + self.activations.append(act) + + def end(self, mode): + act = np.concatenate(self.activations) + mu = np.mean(act, axis=0) + sigma = np.cov(act, rowvar=False) + if mode in ['warmup', 'reals']: + self.mu_real = mu + self.sigma_real = sigma + fid = calculate_frechet_distance(mu, sigma, self.mu_real, self.sigma_real) + return [fid] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/metrics/inception_score.py b/ContraCLIP/models/genforce/converters/pggan_official/metrics/inception_score.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ed7483072b7844a3779c965fa058ef75d06f5a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/metrics/inception_score.py @@ -0,0 +1,147 @@ +# Copyright 2016 Wojciech Zaremba +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Adapted from the original implementation by Wojciech Zaremba. +# Source: https://github.com/openai/improved-gan/blob/master/inception_score/model.py + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os.path +import sys +import tarfile + +import numpy as np +from six.moves import urllib +import tensorflow as tf +import glob +import scipy.misc +import math +import sys + +MODEL_DIR = '/tmp/imagenet' + +DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' +softmax = None + +# Call this function with list of images. Each of elements should be a +# numpy array with values ranging from 0 to 255. +def get_inception_score(images, splits=10): + assert(type(images) == list) + assert(type(images[0]) == np.ndarray) + assert(len(images[0].shape) == 3) + #assert(np.max(images[0]) > 10) # EDIT: commented out + #assert(np.min(images[0]) >= 0.0) + inps = [] + for img in images: + img = img.astype(np.float32) + inps.append(np.expand_dims(img, 0)) + bs = 100 + with tf.Session() as sess: + preds = [] + n_batches = int(math.ceil(float(len(inps)) / float(bs))) + for i in range(n_batches): + #sys.stdout.write(".") # EDIT: commented out + #sys.stdout.flush() + inp = inps[(i * bs):min((i + 1) * bs, len(inps))] + inp = np.concatenate(inp, 0) + pred = sess.run(softmax, {'ExpandDims:0': inp}) + preds.append(pred) + preds = np.concatenate(preds, 0) + scores = [] + for i in range(splits): + part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :] + kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) + kl = np.mean(np.sum(kl, 1)) + scores.append(np.exp(kl)) + return np.mean(scores), np.std(scores) + +# This function is called automatically. +def _init_inception(): + global softmax + if not os.path.exists(MODEL_DIR): + os.makedirs(MODEL_DIR) + filename = DATA_URL.split('/')[-1] + filepath = os.path.join(MODEL_DIR, filename) + if not os.path.exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % ( + filename, float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') + tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR) # EDIT: increased indent + with tf.gfile.FastGFile(os.path.join( + MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + _ = tf.import_graph_def(graph_def, name='') + # Works with an arbitrary minibatch size. + with tf.Session() as sess: + pool3 = sess.graph.get_tensor_by_name('pool_3:0') + ops = pool3.graph.get_operations() + for op_idx, op in enumerate(ops): + for o in op.outputs: + shape = o.get_shape() + shape = [s.value for s in shape] + new_shape = [] + for j, s in enumerate(shape): + if s == 1 and j == 0: + new_shape.append(None) + else: + new_shape.append(s) + try: + o._shape = tf.TensorShape(new_shape) + except ValueError: + o._shape_val = tf.TensorShape(new_shape) # EDIT: added for compatibility with tensorflow 1.6.0 + w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1] + logits = tf.matmul(tf.squeeze(pool3), w) + softmax = tf.nn.softmax(logits) + +#if softmax is None: # EDIT: commented out +# _init_inception() # EDIT: commented out + +#---------------------------------------------------------------------------- +# EDIT: added + +class API: + def __init__(self, num_images, image_shape, image_dtype, minibatch_size): + import config + globals()['MODEL_DIR'] = os.path.join(config.result_dir, '_inception') + self.sess = tf.get_default_session() + _init_inception() + + def get_metric_names(self): + return ['IS_mean', 'IS_std'] + + def get_metric_formatting(self): + return ['%-10.4f', '%-10.4f'] + + def begin(self, mode): + assert mode in ['warmup', 'reals', 'fakes'] + self.images = [] + + def feed(self, mode, minibatch): + self.images.append(minibatch.transpose(0, 2, 3, 1)) + + def end(self, mode): + images = list(np.concatenate(self.images)) + with self.sess.as_default(): + mean, std = get_inception_score(images) + return [mean, std] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/metrics/ms_ssim.py b/ContraCLIP/models/genforce/converters/pggan_official/metrics/ms_ssim.py new file mode 100644 index 0000000000000000000000000000000000000000..1135f2a7788d4c6c68e22aeb2cdeaaeed780df75 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/metrics/ms_ssim.py @@ -0,0 +1,200 @@ +#!/usr/bin/python +# +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Adapted from the original implementation by The TensorFlow Authors. +# Source: https://github.com/tensorflow/models/blob/master/research/compression/image_encoder/msssim.py + +import numpy as np +from scipy import signal +from scipy.ndimage.filters import convolve + +def _FSpecialGauss(size, sigma): + """Function to mimic the 'fspecial' gaussian MATLAB function.""" + radius = size // 2 + offset = 0.0 + start, stop = -radius, radius + 1 + if size % 2 == 0: + offset = 0.5 + stop -= 1 + x, y = np.mgrid[offset + start:stop, offset + start:stop] + assert len(x) == size + g = np.exp(-((x**2 + y**2)/(2.0 * sigma**2))) + return g / g.sum() + +def _SSIMForMultiScale(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): + """Return the Structural Similarity Map between `img1` and `img2`. + + This function attempts to match the functionality of ssim_index_new.m by + Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip + + Arguments: + img1: Numpy array holding the first RGB image batch. + img2: Numpy array holding the second RGB image batch. + max_val: the dynamic range of the images (i.e., the difference between the + maximum the and minimum allowed values). + filter_size: Size of blur kernel to use (will be reduced for small images). + filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced + for small images). + k1: Constant used to maintain stability in the SSIM calculation (0.01 in + the original paper). + k2: Constant used to maintain stability in the SSIM calculation (0.03 in + the original paper). + + Returns: + Pair containing the mean SSIM and contrast sensitivity between `img1` and + `img2`. + + Raises: + RuntimeError: If input images don't have the same shape or don't have four + dimensions: [batch_size, height, width, depth]. + """ + if img1.shape != img2.shape: + raise RuntimeError('Input images must have the same shape (%s vs. %s).' % (img1.shape, img2.shape)) + if img1.ndim != 4: + raise RuntimeError('Input images must have four dimensions, not %d' % img1.ndim) + + img1 = img1.astype(np.float32) + img2 = img2.astype(np.float32) + _, height, width, _ = img1.shape + + # Filter size can't be larger than height or width of images. + size = min(filter_size, height, width) + + # Scale down sigma if a smaller filter size is used. + sigma = size * filter_sigma / filter_size if filter_size else 0 + + if filter_size: + window = np.reshape(_FSpecialGauss(size, sigma), (1, size, size, 1)) + mu1 = signal.fftconvolve(img1, window, mode='valid') + mu2 = signal.fftconvolve(img2, window, mode='valid') + sigma11 = signal.fftconvolve(img1 * img1, window, mode='valid') + sigma22 = signal.fftconvolve(img2 * img2, window, mode='valid') + sigma12 = signal.fftconvolve(img1 * img2, window, mode='valid') + else: + # Empty blur kernel so no need to convolve. + mu1, mu2 = img1, img2 + sigma11 = img1 * img1 + sigma22 = img2 * img2 + sigma12 = img1 * img2 + + mu11 = mu1 * mu1 + mu22 = mu2 * mu2 + mu12 = mu1 * mu2 + sigma11 -= mu11 + sigma22 -= mu22 + sigma12 -= mu12 + + # Calculate intermediate values used by both ssim and cs_map. + c1 = (k1 * max_val) ** 2 + c2 = (k2 * max_val) ** 2 + v1 = 2.0 * sigma12 + c2 + v2 = sigma11 + sigma22 + c2 + ssim = np.mean((((2.0 * mu12 + c1) * v1) / ((mu11 + mu22 + c1) * v2)), axis=(1, 2, 3)) # Return for each image individually. + cs = np.mean(v1 / v2, axis=(1, 2, 3)) + return ssim, cs + +def _HoxDownsample(img): + return (img[:, 0::2, 0::2, :] + img[:, 1::2, 0::2, :] + img[:, 0::2, 1::2, :] + img[:, 1::2, 1::2, :]) * 0.25 + +def msssim(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03, weights=None): + """Return the MS-SSIM score between `img1` and `img2`. + + This function implements Multi-Scale Structural Similarity (MS-SSIM) Image + Quality Assessment according to Zhou Wang's paper, "Multi-scale structural + similarity for image quality assessment" (2003). + Link: https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf + + Author's MATLAB implementation: + http://www.cns.nyu.edu/~lcv/ssim/msssim.zip + + Arguments: + img1: Numpy array holding the first RGB image batch. + img2: Numpy array holding the second RGB image batch. + max_val: the dynamic range of the images (i.e., the difference between the + maximum the and minimum allowed values). + filter_size: Size of blur kernel to use (will be reduced for small images). + filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced + for small images). + k1: Constant used to maintain stability in the SSIM calculation (0.01 in + the original paper). + k2: Constant used to maintain stability in the SSIM calculation (0.03 in + the original paper). + weights: List of weights for each level; if none, use five levels and the + weights from the original paper. + + Returns: + MS-SSIM score between `img1` and `img2`. + + Raises: + RuntimeError: If input images don't have the same shape or don't have four + dimensions: [batch_size, height, width, depth]. + """ + if img1.shape != img2.shape: + raise RuntimeError('Input images must have the same shape (%s vs. %s).' % (img1.shape, img2.shape)) + if img1.ndim != 4: + raise RuntimeError('Input images must have four dimensions, not %d' % img1.ndim) + + # Note: default weights don't sum to 1.0 but do match the paper / matlab code. + weights = np.array(weights if weights else [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]) + levels = weights.size + downsample_filter = np.ones((1, 2, 2, 1)) / 4.0 + im1, im2 = [x.astype(np.float32) for x in [img1, img2]] + mssim = [] + mcs = [] + for _ in range(levels): + ssim, cs = _SSIMForMultiScale( + im1, im2, max_val=max_val, filter_size=filter_size, + filter_sigma=filter_sigma, k1=k1, k2=k2) + mssim.append(ssim) + mcs.append(cs) + im1, im2 = [_HoxDownsample(x) for x in [im1, im2]] + + # Clip to zero. Otherwise we get NaNs. + mssim = np.clip(np.asarray(mssim), 0.0, np.inf) + mcs = np.clip(np.asarray(mcs), 0.0, np.inf) + + # Average over images only at the end. + return np.mean(np.prod(mcs[:-1, :] ** weights[:-1, np.newaxis], axis=0) * (mssim[-1, :] ** weights[-1])) + +#---------------------------------------------------------------------------- +# EDIT: added + +class API: + def __init__(self, num_images, image_shape, image_dtype, minibatch_size): + assert num_images % 2 == 0 and minibatch_size % 2 == 0 + self.num_pairs = num_images // 2 + + def get_metric_names(self): + return ['MS-SSIM'] + + def get_metric_formatting(self): + return ['%-10.4f'] + + def begin(self, mode): + assert mode in ['warmup', 'reals', 'fakes'] + self.sum = 0.0 + + def feed(self, mode, minibatch): + images = minibatch.transpose(0, 2, 3, 1) + score = msssim(images[0::2], images[1::2]) + self.sum += score * (images.shape[0] // 2) + + def end(self, mode): + avg = self.sum / self.num_pairs + return [avg] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/metrics/sliced_wasserstein.py b/ContraCLIP/models/genforce/converters/pggan_official/metrics/sliced_wasserstein.py new file mode 100644 index 0000000000000000000000000000000000000000..0028897c3aeffe7eb8f63eb4b1f37c2329dc84cf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/metrics/sliced_wasserstein.py @@ -0,0 +1,135 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import numpy as np +import scipy.ndimage + +#---------------------------------------------------------------------------- + +def get_descriptors_for_minibatch(minibatch, nhood_size, nhoods_per_image): + S = minibatch.shape # (minibatch, channel, height, width) + assert len(S) == 4 and S[1] == 3 + N = nhoods_per_image * S[0] + H = nhood_size // 2 + nhood, chan, x, y = np.ogrid[0:N, 0:3, -H:H+1, -H:H+1] + img = nhood // nhoods_per_image + x = x + np.random.randint(H, S[3] - H, size=(N, 1, 1, 1)) + y = y + np.random.randint(H, S[2] - H, size=(N, 1, 1, 1)) + idx = ((img * S[1] + chan) * S[2] + y) * S[3] + x + return minibatch.flat[idx] + +#---------------------------------------------------------------------------- + +def finalize_descriptors(desc): + if isinstance(desc, list): + desc = np.concatenate(desc, axis=0) + assert desc.ndim == 4 # (neighborhood, channel, height, width) + desc -= np.mean(desc, axis=(0, 2, 3), keepdims=True) + desc /= np.std(desc, axis=(0, 2, 3), keepdims=True) + desc = desc.reshape(desc.shape[0], -1) + return desc + +#---------------------------------------------------------------------------- + +def sliced_wasserstein(A, B, dir_repeats, dirs_per_repeat): + assert A.ndim == 2 and A.shape == B.shape # (neighborhood, descriptor_component) + results = [] + for repeat in range(dir_repeats): + dirs = np.random.randn(A.shape[1], dirs_per_repeat) # (descriptor_component, direction) + dirs /= np.sqrt(np.sum(np.square(dirs), axis=0, keepdims=True)) # normalize descriptor components for each direction + dirs = dirs.astype(np.float32) + projA = np.matmul(A, dirs) # (neighborhood, direction) + projB = np.matmul(B, dirs) + projA = np.sort(projA, axis=0) # sort neighborhood projections for each direction + projB = np.sort(projB, axis=0) + dists = np.abs(projA - projB) # pointwise wasserstein distances + results.append(np.mean(dists)) # average over neighborhoods and directions + return np.mean(results) # average over repeats + +#---------------------------------------------------------------------------- + +def downscale_minibatch(minibatch, lod): + if lod == 0: + return minibatch + t = minibatch.astype(np.float32) + for i in range(lod): + t = (t[:, :, 0::2, 0::2] + t[:, :, 0::2, 1::2] + t[:, :, 1::2, 0::2] + t[:, :, 1::2, 1::2]) * 0.25 + return np.round(t).clip(0, 255).astype(np.uint8) + +#---------------------------------------------------------------------------- + +gaussian_filter = np.float32([ + [1, 4, 6, 4, 1], + [4, 16, 24, 16, 4], + [6, 24, 36, 24, 6], + [4, 16, 24, 16, 4], + [1, 4, 6, 4, 1]]) / 256.0 + +def pyr_down(minibatch): # matches cv2.pyrDown() + assert minibatch.ndim == 4 + return scipy.ndimage.convolve(minibatch, gaussian_filter[np.newaxis, np.newaxis, :, :], mode='mirror')[:, :, ::2, ::2] + +def pyr_up(minibatch): # matches cv2.pyrUp() + assert minibatch.ndim == 4 + S = minibatch.shape + res = np.zeros((S[0], S[1], S[2] * 2, S[3] * 2), minibatch.dtype) + res[:, :, ::2, ::2] = minibatch + return scipy.ndimage.convolve(res, gaussian_filter[np.newaxis, np.newaxis, :, :] * 4.0, mode='mirror') + +def generate_laplacian_pyramid(minibatch, num_levels): + pyramid = [np.float32(minibatch)] + for i in range(1, num_levels): + pyramid.append(pyr_down(pyramid[-1])) + pyramid[-2] -= pyr_up(pyramid[-1]) + return pyramid + +def reconstruct_laplacian_pyramid(pyramid): + minibatch = pyramid[-1] + for level in pyramid[-2::-1]: + minibatch = pyr_up(minibatch) + level + return minibatch + +#---------------------------------------------------------------------------- + +class API: + def __init__(self, num_images, image_shape, image_dtype, minibatch_size): + self.nhood_size = 7 + self.nhoods_per_image = 128 + self.dir_repeats = 4 + self.dirs_per_repeat = 128 + self.resolutions = [] + res = image_shape[1] + while res >= 16: + self.resolutions.append(res) + res //= 2 + + def get_metric_names(self): + return ['SWDx1e3_%d' % res for res in self.resolutions] + ['SWDx1e3_avg'] + + def get_metric_formatting(self): + return ['%-13.4f'] * len(self.get_metric_names()) + + def begin(self, mode): + assert mode in ['warmup', 'reals', 'fakes'] + self.descriptors = [[] for res in self.resolutions] + + def feed(self, mode, minibatch): + for lod, level in enumerate(generate_laplacian_pyramid(minibatch, len(self.resolutions))): + desc = get_descriptors_for_minibatch(level, self.nhood_size, self.nhoods_per_image) + self.descriptors[lod].append(desc) + + def end(self, mode): + desc = [finalize_descriptors(d) for d in self.descriptors] + del self.descriptors + if mode in ['warmup', 'reals']: + self.desc_real = desc + dist = [sliced_wasserstein(dreal, dfake, self.dir_repeats, self.dirs_per_repeat) for dreal, dfake in zip(self.desc_real, desc)] + del desc + dist = [d * 1e3 for d in dist] # multiply by 10^3 + return dist + [np.mean(dist)] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/misc.py b/ContraCLIP/models/genforce/converters/pggan_official/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..d0130e3fcb80d8910eda7c0763a9c5897daed7e6 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/misc.py @@ -0,0 +1,344 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import sys +import glob +import datetime +import pickle +import re +import numpy as np +from collections import OrderedDict +import scipy.ndimage +import PIL.Image + +import config +import dataset +import legacy + +#---------------------------------------------------------------------------- +# Convenience wrappers for pickle that are able to load data produced by +# older versions of the code. + +def load_pkl(filename): + with open(filename, 'rb') as file: + return legacy.LegacyUnpickler(file, encoding='latin1').load() + +def save_pkl(obj, filename): + with open(filename, 'wb') as file: + pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) + +#---------------------------------------------------------------------------- +# Image utils. + +def adjust_dynamic_range(data, drange_in, drange_out): + if drange_in != drange_out: + scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0])) + bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale) + data = data * scale + bias + return data + +def create_image_grid(images, grid_size=None): + assert images.ndim == 3 or images.ndim == 4 + num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2] + + if grid_size is not None: + grid_w, grid_h = tuple(grid_size) + else: + grid_w = max(int(np.ceil(np.sqrt(num))), 1) + grid_h = max((num - 1) // grid_w + 1, 1) + + grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype) + for idx in range(num): + x = (idx % grid_w) * img_w + y = (idx // grid_w) * img_h + grid[..., y : y + img_h, x : x + img_w] = images[idx] + return grid + +def convert_to_pil_image(image, drange=[0,1]): + assert image.ndim == 2 or image.ndim == 3 + if image.ndim == 3: + if image.shape[0] == 1: + image = image[0] # grayscale CHW => HW + else: + image = image.transpose(1, 2, 0) # CHW -> HWC + + image = adjust_dynamic_range(image, drange, [0,255]) + image = np.rint(image).clip(0, 255).astype(np.uint8) + format = 'RGB' if image.ndim == 3 else 'L' + return PIL.Image.fromarray(image, format) + +def save_image(image, filename, drange=[0,1], quality=95): + img = convert_to_pil_image(image, drange) + if '.jpg' in filename: + img.save(filename,"JPEG", quality=quality, optimize=True) + else: + img.save(filename) + +def save_image_grid(images, filename, drange=[0,1], grid_size=None): + convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename) + +#---------------------------------------------------------------------------- +# Logging of stdout and stderr to a file. + +class OutputLogger(object): + def __init__(self): + self.file = None + self.buffer = '' + + def set_log_file(self, filename, mode='wt'): + assert self.file is None + self.file = open(filename, mode) + if self.buffer is not None: + self.file.write(self.buffer) + self.buffer = None + + def write(self, data): + if self.file is not None: + self.file.write(data) + if self.buffer is not None: + self.buffer += data + + def flush(self): + if self.file is not None: + self.file.flush() + +class TeeOutputStream(object): + def __init__(self, child_streams, autoflush=False): + self.child_streams = child_streams + self.autoflush = autoflush + + def write(self, data): + for stream in self.child_streams: + stream.write(data) + if self.autoflush: + self.flush() + + def flush(self): + for stream in self.child_streams: + stream.flush() + +output_logger = None + +def init_output_logging(): + global output_logger + if output_logger is None: + output_logger = OutputLogger() + sys.stdout = TeeOutputStream([sys.stdout, output_logger], autoflush=True) + sys.stderr = TeeOutputStream([sys.stderr, output_logger], autoflush=True) + +def set_output_log_file(filename, mode='wt'): + if output_logger is not None: + output_logger.set_log_file(filename, mode) + +#---------------------------------------------------------------------------- +# Reporting results. + +def create_result_subdir(result_dir, desc): + + # Select run ID and create subdir. + while True: + run_id = 0 + for fname in glob.glob(os.path.join(result_dir, '*')): + try: + fbase = os.path.basename(fname) + ford = int(fbase[:fbase.find('-')]) + run_id = max(run_id, ford + 1) + except ValueError: + pass + + result_subdir = os.path.join(result_dir, '%03d-%s' % (run_id, desc)) + try: + os.makedirs(result_subdir) + break + except OSError: + if os.path.isdir(result_subdir): + continue + raise + + print("Saving results to", result_subdir) + set_output_log_file(os.path.join(result_subdir, 'log.txt')) + + # Export config. + try: + with open(os.path.join(result_subdir, 'config.txt'), 'wt') as fout: + for k, v in sorted(config.__dict__.items()): + if not k.startswith('_'): + fout.write("%s = %s\n" % (k, str(v))) + except: + pass + + return result_subdir + +def format_time(seconds): + s = int(np.rint(seconds)) + if s < 60: return '%ds' % (s) + elif s < 60*60: return '%dm %02ds' % (s // 60, s % 60) + elif s < 24*60*60: return '%dh %02dm %02ds' % (s // (60*60), (s // 60) % 60, s % 60) + else: return '%dd %02dh %02dm' % (s // (24*60*60), (s // (60*60)) % 24, (s // 60) % 60) + +#---------------------------------------------------------------------------- +# Locating results. + +def locate_result_subdir(run_id_or_result_subdir): + if isinstance(run_id_or_result_subdir, str) and os.path.isdir(run_id_or_result_subdir): + return run_id_or_result_subdir + + searchdirs = [] + searchdirs += [''] + searchdirs += ['results'] + searchdirs += ['networks'] + + for searchdir in searchdirs: + dir = config.result_dir if searchdir == '' else os.path.join(config.result_dir, searchdir) + dir = os.path.join(dir, str(run_id_or_result_subdir)) + if os.path.isdir(dir): + return dir + prefix = '%03d' % run_id_or_result_subdir if isinstance(run_id_or_result_subdir, int) else str(run_id_or_result_subdir) + dirs = sorted(glob.glob(os.path.join(config.result_dir, searchdir, prefix + '-*'))) + dirs = [dir for dir in dirs if os.path.isdir(dir)] + if len(dirs) == 1: + return dirs[0] + raise IOError('Cannot locate result subdir for run', run_id_or_result_subdir) + +def list_network_pkls(run_id_or_result_subdir, include_final=True): + result_subdir = locate_result_subdir(run_id_or_result_subdir) + pkls = sorted(glob.glob(os.path.join(result_subdir, 'network-*.pkl'))) + if len(pkls) >= 1 and os.path.basename(pkls[0]) == 'network-final.pkl': + if include_final: + pkls.append(pkls[0]) + del pkls[0] + return pkls + +def locate_network_pkl(run_id_or_result_subdir_or_network_pkl, snapshot=None): + if isinstance(run_id_or_result_subdir_or_network_pkl, str) and os.path.isfile(run_id_or_result_subdir_or_network_pkl): + return run_id_or_result_subdir_or_network_pkl + + pkls = list_network_pkls(run_id_or_result_subdir_or_network_pkl) + if len(pkls) >= 1 and snapshot is None: + return pkls[-1] + for pkl in pkls: + try: + name = os.path.splitext(os.path.basename(pkl))[0] + number = int(name.split('-')[-1]) + if number == snapshot: + return pkl + except ValueError: pass + except IndexError: pass + raise IOError('Cannot locate network pkl for snapshot', snapshot) + +def get_id_string_for_network_pkl(network_pkl): + p = network_pkl.replace('.pkl', '').replace('\\', '/').split('/') + return '-'.join(p[max(len(p) - 2, 0):]) + +#---------------------------------------------------------------------------- +# Loading and using trained networks. + +def load_network_pkl(run_id_or_result_subdir_or_network_pkl, snapshot=None): + return load_pkl(locate_network_pkl(run_id_or_result_subdir_or_network_pkl, snapshot)) + +def random_latents(num_latents, G, random_state=None): + if random_state is not None: + return random_state.randn(num_latents, *G.input_shape[1:]).astype(np.float32) + else: + return np.random.randn(num_latents, *G.input_shape[1:]).astype(np.float32) + +def load_dataset_for_previous_run(run_id, **kwargs): # => dataset_obj, mirror_augment + result_subdir = locate_result_subdir(run_id) + + # Parse config.txt. + parsed_cfg = dict() + with open(os.path.join(result_subdir, 'config.txt'), 'rt') as f: + for line in f: + if line.startswith('dataset =') or line.startswith('train ='): + exec(line, parsed_cfg, parsed_cfg) + dataset_cfg = parsed_cfg.get('dataset', dict()) + train_cfg = parsed_cfg.get('train', dict()) + mirror_augment = train_cfg.get('mirror_augment', False) + + # Handle legacy options. + if 'h5_path' in dataset_cfg: + dataset_cfg['tfrecord_dir'] = dataset_cfg.pop('h5_path').replace('.h5', '') + if 'mirror_augment' in dataset_cfg: + mirror_augment = dataset_cfg.pop('mirror_augment') + if 'max_labels' in dataset_cfg: + v = dataset_cfg.pop('max_labels') + if v is None: v = 0 + if v == 'all': v = 'full' + dataset_cfg['max_label_size'] = v + if 'max_images' in dataset_cfg: + dataset_cfg.pop('max_images') + + # Handle legacy dataset names. + v = dataset_cfg['tfrecord_dir'] + v = v.replace('-32x32', '').replace('-32', '') + v = v.replace('-128x128', '').replace('-128', '') + v = v.replace('-256x256', '').replace('-256', '') + v = v.replace('-1024x1024', '').replace('-1024', '') + v = v.replace('celeba-hq', 'celebahq') + v = v.replace('cifar-10', 'cifar10') + v = v.replace('cifar-100', 'cifar100') + v = v.replace('mnist-rgb', 'mnistrgb') + v = re.sub('lsun-100k-([^-]*)', 'lsun-\\1-100k', v) + v = re.sub('lsun-full-([^-]*)', 'lsun-\\1-full', v) + dataset_cfg['tfrecord_dir'] = v + + # Load dataset. + dataset_cfg.update(kwargs) + dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **dataset_cfg) + return dataset_obj, mirror_augment + +def apply_mirror_augment(minibatch): + mask = np.random.rand(minibatch.shape[0]) < 0.5 + minibatch = np.array(minibatch) + minibatch[mask] = minibatch[mask, :, :, ::-1] + return minibatch + +#---------------------------------------------------------------------------- +# Text labels. + +_text_label_cache = OrderedDict() + +def draw_text_label(img, text, x, y, alignx=0.5, aligny=0.5, color=255, opacity=1.0, glow_opacity=1.0, **kwargs): + color = np.array(color).flatten().astype(np.float32) + assert img.ndim == 3 and img.shape[2] == color.size or color.size == 1 + alpha, glow = setup_text_label(text, **kwargs) + xx, yy = int(np.rint(x - alpha.shape[1] * alignx)), int(np.rint(y - alpha.shape[0] * aligny)) + xb, yb = max(-xx, 0), max(-yy, 0) + xe, ye = min(alpha.shape[1], img.shape[1] - xx), min(alpha.shape[0], img.shape[0] - yy) + img = np.array(img) + slice = img[yy+yb : yy+ye, xx+xb : xx+xe, :] + slice[:] = slice * (1.0 - (1.0 - (1.0 - alpha[yb:ye, xb:xe]) * (1.0 - glow[yb:ye, xb:xe] * glow_opacity)) * opacity)[:, :, np.newaxis] + slice[:] = slice + alpha[yb:ye, xb:xe, np.newaxis] * (color * opacity)[np.newaxis, np.newaxis, :] + return img + +def setup_text_label(text, font='Calibri', fontsize=32, padding=6, glow_size=2.0, glow_coef=3.0, glow_exp=2.0, cache_size=100): # => (alpha, glow) + # Lookup from cache. + key = (text, font, fontsize, padding, glow_size, glow_coef, glow_exp) + if key in _text_label_cache: + value = _text_label_cache[key] + del _text_label_cache[key] # LRU policy + _text_label_cache[key] = value + return value + + # Limit cache size. + while len(_text_label_cache) >= cache_size: + _text_label_cache.popitem(last=False) + + # Render text. + import moviepy.editor # pip install moviepy + alpha = moviepy.editor.TextClip(text, font=font, fontsize=fontsize).mask.make_frame(0) + alpha = np.pad(alpha, padding, mode='constant', constant_values=0.0) + glow = scipy.ndimage.gaussian_filter(alpha, glow_size) + glow = 1.0 - np.maximum(1.0 - glow * glow_coef, 0.0) ** glow_exp + + # Add to cache. + value = (alpha, glow) + _text_label_cache[key] = value + return value + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/networks.py b/ContraCLIP/models/genforce/converters/pggan_official/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..731683f7834bb3268b32e07326c9caec83c888af --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/networks.py @@ -0,0 +1,315 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import numpy as np +import tensorflow as tf + +# NOTE: Do not import any application-specific modules here! + +#---------------------------------------------------------------------------- + +def lerp(a, b, t): return a + (b - a) * t +def lerp_clip(a, b, t): return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) +def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + +#---------------------------------------------------------------------------- +# Get/create weight tensor for a convolutional or fully-connected layer. + +def get_weight(shape, gain=np.sqrt(2), use_wscale=False, fan_in=None): + if fan_in is None: fan_in = np.prod(shape[:-1]) + std = gain / np.sqrt(fan_in) # He init + if use_wscale: + wscale = tf.constant(np.float32(std), name='wscale') + return tf.get_variable('weight', shape=shape, initializer=tf.initializers.random_normal()) * wscale + else: + return tf.get_variable('weight', shape=shape, initializer=tf.initializers.random_normal(0, std)) + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense(x, fmaps, gain=np.sqrt(2), use_wscale=False): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# Convolutional layer. + +def conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Apply bias to the given activation tensor. + +def apply_bias(x): + b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) + b = tf.cast(b, x.dtype) + if len(x.shape) == 2: + return x + b + else: + return x + tf.reshape(b, [1, -1, 1, 1]) + +#---------------------------------------------------------------------------- +# Leaky ReLU activation. Same as tf.nn.leaky_relu, but supports FP16. + +def leaky_relu(x, alpha=0.2): + with tf.name_scope('LeakyRelu'): + alpha = tf.constant(alpha, dtype=x.dtype, name='alpha') + return tf.maximum(x * alpha, x) + +#---------------------------------------------------------------------------- +# Nearest-neighbor upscaling layer. + +def upscale2d(x, factor=2): + assert isinstance(factor, int) and factor >= 1 + if factor == 1: return x + with tf.variable_scope('Upscale2D'): + s = x.shape + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x + +#---------------------------------------------------------------------------- +# Fused upscale2d + conv2d. +# Faster and uses less memory than performing the operations separately. + +def upscale2d_conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, fmaps, x.shape[1].value], gain=gain, use_wscale=use_wscale, fan_in=(kernel**2)*x.shape[1].value) + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) + w = tf.cast(w, x.dtype) + os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2] + return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Box filter downscaling layer. + +def downscale2d(x, factor=2): + assert isinstance(factor, int) and factor >= 1 + if factor == 1: return x + with tf.variable_scope('Downscale2D'): + ksize = [1, 1, factor, factor] + return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') # NOTE: requires tf_config['graph_options.place_pruned_graph'] = True + +#---------------------------------------------------------------------------- +# Fused conv2d + downscale2d. +# Faster and uses less memory than performing the operations separately. + +def conv2d_downscale2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25 + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Pixelwise feature vector normalization. + +def pixel_norm(x, epsilon=1e-8): + with tf.variable_scope('PixelNorm'): + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon) + +#---------------------------------------------------------------------------- +# Minibatch standard deviation. + +def minibatch_stddev_layer(x, group_size=4): + with tf.variable_scope('MinibatchStddev'): + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + s = x.shape # [NCHW] Input shape. + y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]]) # [GMCHW] Split minibatch into M groups of size G. + y = tf.cast(y, tf.float32) # [GMCHW] Cast to FP32. + y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMCHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [MCHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [MCHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[1,2,3], keepdims=True) # [M111] Take average over fmaps and pixels. + y = tf.cast(y, x.dtype) # [M111] Cast back to original data type. + y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [N1HW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. + +#---------------------------------------------------------------------------- +# Generator network used in the paper. + +def G_paper( + latents_in, # First input: Latent vectors [minibatch, latent_size]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 1, # Number of output color channels. Overridden based on dataset. + resolution = 32, # Output resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + latent_size = None, # Dimensionality of the latent vectors. None = min(fmap_base, fmap_max). + normalize_latents = True, # Normalize latent vectors before feeding them to the network? + use_wscale = True, # Enable equalized learning rate? + use_pixelnorm = True, # Enable pixelwise feature vector normalization? + pixelnorm_epsilon = 1e-8, # Constant epsilon for pixelwise feature vector normalization. + use_leakyrelu = True, # True = leaky ReLU, False = ReLU. + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = True, # True = use fused upscale2d + conv2d, False = separate upscale2d layers. + structure = None, # 'linear' = human-readable, 'recursive' = efficient, None = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + def PN(x): return pixel_norm(x, epsilon=pixelnorm_epsilon) if use_pixelnorm else x + if latent_size is None: latent_size = nf(0) + if structure is None: structure = 'linear' if is_template_graph else 'recursive' + act = leaky_relu if use_leakyrelu else tf.nn.relu + + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + combo_in = tf.cast(tf.concat([latents_in, labels_in], axis=1), dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + + # Building blocks. + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if res == 2: # 4x4 + if normalize_latents: x = pixel_norm(x, epsilon=pixelnorm_epsilon) + with tf.variable_scope('Dense'): + x = dense(x, fmaps=nf(res-1)*16, gain=np.sqrt(2)/4, use_wscale=use_wscale) # override gain to match the original Theano implementation + x = tf.reshape(x, [-1, nf(res-1), 4, 4]) + x = PN(act(apply_bias(x))) + with tf.variable_scope('Conv'): + x = PN(act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + else: # 8x8 and up + if fused_scale: + with tf.variable_scope('Conv0_up'): + x = PN(act(apply_bias(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + else: + x = upscale2d(x) + with tf.variable_scope('Conv0'): + x = PN(act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + with tf.variable_scope('Conv1'): + x = PN(act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + return x + def torgb(x, res): # res = 2..resolution_log2 + lod = resolution_log2 - res + with tf.variable_scope('ToRGB_lod%d' % lod): + return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) + + # Linear structure: simple but inefficient. + if structure == 'linear': + x = block(combo_in, 2) + images_out = torgb(x, 2) + for res in range(3, resolution_log2 + 1): + lod = resolution_log2 - res + x = block(x, res) + img = torgb(x, res) + images_out = upscale2d(images_out) + with tf.variable_scope('Grow_lod%d' % lod): + images_out = lerp_clip(img, images_out, lod_in - lod) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def grow(x, res, lod): + y = block(x, res) + img = lambda: upscale2d(torgb(y, res), 2**lod) + if res > 2: img = cset(img, (lod_in > lod), lambda: upscale2d(lerp(torgb(y, res), upscale2d(torgb(x, res - 1)), lod_in - lod), 2**lod)) + if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) + return img() + images_out = grow(combo_in, 2, resolution_log2 - 2) + + assert images_out.dtype == tf.as_dtype(dtype) + images_out = tf.identity(images_out, name='images_out') + return images_out + +#---------------------------------------------------------------------------- +# Discriminator network used in the paper. + +def D_paper( + images_in, # Input: Images [minibatch, channel, height, width]. + num_channels = 1, # Number of input color channels. Overridden based on dataset. + resolution = 32, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + use_wscale = True, # Enable equalized learning rate? + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = True, # True = use fused conv2d + downscale2d, False = separate downscale2d layers. + structure = None, # 'linear' = human-readable, 'recursive' = efficient, None = select automatically + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + if structure is None: structure = 'linear' if is_template_graph else 'recursive' + act = leaky_relu + + images_in.set_shape([None, num_channels, resolution, resolution]) + images_in = tf.cast(images_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + + # Building blocks. + def fromrgb(x, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): + return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, use_wscale=use_wscale))) + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if res >= 3: # 8x8 and up + with tf.variable_scope('Conv0'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale))) + if fused_scale: + with tf.variable_scope('Conv1_down'): + x = act(apply_bias(conv2d_downscale2d(x, fmaps=nf(res-2), kernel=3, use_wscale=use_wscale))) + else: + with tf.variable_scope('Conv1'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-2), kernel=3, use_wscale=use_wscale))) + x = downscale2d(x) + else: # 4x4 + if mbstd_group_size > 1: + x = minibatch_stddev_layer(x, mbstd_group_size) + with tf.variable_scope('Conv'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale))) + with tf.variable_scope('Dense0'): + x = act(apply_bias(dense(x, fmaps=nf(res-2), use_wscale=use_wscale))) + with tf.variable_scope('Dense1'): + x = apply_bias(dense(x, fmaps=1+label_size, gain=1, use_wscale=use_wscale)) + return x + + # Linear structure: simple but inefficient. + if structure == 'linear': + img = images_in + x = fromrgb(img, resolution_log2) + for res in range(resolution_log2, 2, -1): + lod = resolution_log2 - res + x = block(x, res) + img = downscale2d(img) + y = fromrgb(img, res - 1) + with tf.variable_scope('Grow_lod%d' % lod): + x = lerp_clip(x, y, lod_in - lod) + combo_out = block(x, 2) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def grow(res, lod): + x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) + if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) + x = block(x(), res); y = lambda: x + if res > 2: y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) + return y() + combo_out = grow(2, resolution_log2 - 2) + + assert combo_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(combo_out[:, :1], name='scores_out') + labels_out = tf.identity(combo_out[:, 1:], name='labels_out') + return scores_out, labels_out + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/tfutil.py b/ContraCLIP/models/genforce/converters/pggan_official/tfutil.py new file mode 100644 index 0000000000000000000000000000000000000000..cf7ad0ada400aae935759190a6384c5dd8a3fc08 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/tfutil.py @@ -0,0 +1,749 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import sys +import inspect +import importlib +import imp +import numpy as np +from collections import OrderedDict +import tensorflow as tf + +#---------------------------------------------------------------------------- +# Convenience. + +def run(*args, **kwargs): # Run the specified ops in the default session. + return tf.get_default_session().run(*args, **kwargs) + +def is_tf_expression(x): + return isinstance(x, tf.Tensor) or isinstance(x, tf.Variable) or isinstance(x, tf.Operation) + +def shape_to_list(shape): + return [dim.value for dim in shape] + +def flatten(x): + with tf.name_scope('Flatten'): + return tf.reshape(x, [-1]) + +def log2(x): + with tf.name_scope('Log2'): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + +def exp2(x): + with tf.name_scope('Exp2'): + return tf.exp(x * np.float32(np.log(2.0))) + +def lerp(a, b, t): + with tf.name_scope('Lerp'): + return a + (b - a) * t + +def lerp_clip(a, b, t): + with tf.name_scope('LerpClip'): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + +def absolute_name_scope(scope): # Forcefully enter the specified name scope, ignoring any surrounding scopes. + return tf.name_scope(scope + '/') + +#---------------------------------------------------------------------------- +# Initialize TensorFlow graph and session using good default settings. + +def init_tf(config_dict=dict()): + if tf.get_default_session() is None: + tf.set_random_seed(np.random.randint(1 << 31)) + create_session(config_dict, force_as_default=True) + +#---------------------------------------------------------------------------- +# Create tf.Session based on config dict of the form +# {'gpu_options.allow_growth': True} + +def create_session(config_dict=dict(), force_as_default=False): + config = tf.ConfigProto() + for key, value in config_dict.items(): + fields = key.split('.') + obj = config + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + session = tf.Session(config=config) + if force_as_default: + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() + return session + +#---------------------------------------------------------------------------- +# Initialize all tf.Variables that have not already been initialized. +# Equivalent to the following, but more efficient and does not bloat the tf graph: +# tf.variables_initializer(tf.report_unitialized_variables()).run() + +def init_uninited_vars(vars=None): + if vars is None: vars = tf.global_variables() + test_vars = []; test_ops = [] + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in vars: + assert is_tf_expression(var) + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(':0', '/IsVariableInitialized:0')) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + with absolute_name_scope(var.name.split(':')[0]): + test_ops.append(tf.is_variable_initialized(var)) + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + +#---------------------------------------------------------------------------- +# Set the values of given tf.Variables. +# Equivalent to the following, but more efficient and does not bloat the tf graph: +# tfutil.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + +def set_vars(var_to_value_dict): + ops = [] + feed_dict = {} + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(':0', '/setter:0')) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(':')[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, 'new_value'), name='setter') # create new setter + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + run(ops, feed_dict) + +#---------------------------------------------------------------------------- +# Autosummary creates an identity op that internally keeps track of the input +# values and automatically shows up in TensorBoard. The reported value +# represents an average over input components. The average is accumulated +# constantly over time and flushed when save_summaries() is called. +# +# Notes: +# - The output tensor must be used as an input for something else in the +# graph. Otherwise, the autosummary op will not get executed, and the average +# value will not get accumulated. +# - It is perfectly fine to include autosummaries with the same name in +# several places throughout the graph, even if they are executed concurrently. +# - It is ok to also pass in a python scalar or numpy array. In this case, it +# is added to the average immediately. + +_autosummary_vars = OrderedDict() # name => [var, ...] +_autosummary_immediate = OrderedDict() # name => update_op, update_value +_autosummary_finalized = False + +def autosummary(name, value): + id = name.replace('/', '_') + if is_tf_expression(value): + with tf.name_scope('summary_' + id), tf.device(value.device): + update_op = _create_autosummary_var(name, value) + with tf.control_dependencies([update_op]): + return tf.identity(value) + else: # python scalar or numpy array + if name not in _autosummary_immediate: + with absolute_name_scope('Autosummary/' + id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(tf.float32) + update_op = _create_autosummary_var(name, update_value) + _autosummary_immediate[name] = update_op, update_value + update_op, update_value = _autosummary_immediate[name] + run(update_op, {update_value: np.float32(value)}) + return value + +# Create the necessary ops to include autosummaries in TensorBoard report. +# Note: This should be done only once per graph. +def finalize_autosummaries(): + global _autosummary_finalized + if _autosummary_finalized: + return + _autosummary_finalized = True + init_uninited_vars([var for vars in _autosummary_vars.values() for var in vars]) + with tf.device(None), tf.control_dependencies(None): + for name, vars in _autosummary_vars.items(): + id = name.replace('/', '_') + with absolute_name_scope('Autosummary/' + id): + sum = tf.add_n(vars) + avg = sum[0] / sum[1] + with tf.control_dependencies([avg]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(2)) for var in vars] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + tf.summary.scalar(name, avg) + +# Internal helper for creating autosummary accumulators. +def _create_autosummary_var(name, value_expr): + assert not _autosummary_finalized + v = tf.cast(value_expr, tf.float32) + if v.shape.ndims is 0: + v = [v, np.float32(1.0)] + elif v.shape.ndims is 1: + v = [tf.reduce_sum(v), tf.cast(tf.shape(v)[0], tf.float32)] + else: + v = [tf.reduce_sum(v), tf.reduce_prod(tf.cast(tf.shape(v), tf.float32))] + v = tf.cond(tf.is_finite(v[0]), lambda: tf.stack(v), lambda: tf.zeros(2)) + with tf.control_dependencies(None): + var = tf.Variable(tf.zeros(2)) # [numerator, denominator] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + if name in _autosummary_vars: + _autosummary_vars[name].append(var) + else: + _autosummary_vars[name] = [var] + return update_op + +#---------------------------------------------------------------------------- +# Call filewriter.add_summary() with all summaries in the default graph, +# automatically finalizing and merging them on the first call. + +_summary_merge_op = None + +def save_summaries(filewriter, global_step=None): + global _summary_merge_op + if _summary_merge_op is None: + finalize_autosummaries() + with tf.device(None), tf.control_dependencies(None): + _summary_merge_op = tf.summary.merge_all() + filewriter.add_summary(_summary_merge_op.eval(), global_step) + +#---------------------------------------------------------------------------- +# Utilities for importing modules and objects by name. + +def import_module(module_or_obj_name): + parts = module_or_obj_name.split('.') + parts[0] = {'np': 'numpy', 'tf': 'tensorflow'}.get(parts[0], parts[0]) + for i in range(len(parts), 0, -1): + try: + module = importlib.import_module('.'.join(parts[:i])) + relative_obj_name = '.'.join(parts[i:]) + return module, relative_obj_name + except ImportError: + pass + raise ImportError(module_or_obj_name) + +def find_obj_in_module(module, relative_obj_name): + obj = module + for part in relative_obj_name.split('.'): + obj = getattr(obj, part) + return obj + +def import_obj(obj_name): + module, relative_obj_name = import_module(obj_name) + return find_obj_in_module(module, relative_obj_name) + +def call_func_by_name(*args, func=None, **kwargs): + assert func is not None + return import_obj(func)(*args, **kwargs) + +#---------------------------------------------------------------------------- +# Wrapper for tf.train.Optimizer that automatically takes care of: +# - Gradient averaging for multi-GPU training. +# - Dynamic loss scaling and typecasts for FP16 training. +# - Ignoring corrupted gradients that contain NaNs/Infs. +# - Reporting statistics. +# - Well-chosen default settings. + +class Optimizer: + def __init__( + self, + name = 'Train', + tf_optimizer = 'tf.train.AdamOptimizer', + learning_rate = 0.001, + use_loss_scaling = False, + loss_scaling_init = 64.0, + loss_scaling_inc = 0.0005, + loss_scaling_dec = 1.0, + **kwargs): + + # Init fields. + self.name = name + self.learning_rate = tf.convert_to_tensor(learning_rate) + self.id = self.name.replace('/', '.') + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = import_obj(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + self._grad_shapes = None # [shape, ...] + self._dev_opt = OrderedDict() # device => optimizer + self._dev_grads = OrderedDict() # device => [[(grad, var), ...], ...] + self._dev_ls_var = OrderedDict() # device => variable (log2 of loss scaling factor) + self._updates_applied = False + + # Register the gradients of the given loss function with respect to the given variables. + # Intended to be called once per GPU. + def register_gradients(self, loss, vars): + assert not self._updates_applied + + # Validate arguments. + if isinstance(vars, dict): + vars = list(vars.values()) # allow passing in Network.trainables as vars + assert isinstance(vars, list) and len(vars) >= 1 + assert all(is_tf_expression(expr) for expr in vars + [loss]) + if self._grad_shapes is None: + self._grad_shapes = [shape_to_list(var.shape) for var in vars] + assert len(vars) == len(self._grad_shapes) + assert all(shape_to_list(var.shape) == var_shape for var, var_shape in zip(vars, self._grad_shapes)) + dev = loss.device + assert all(var.device == dev for var in vars) + + # Register device and compute gradients. + with tf.name_scope(self.id + '_grad'), tf.device(dev): + if dev not in self._dev_opt: + opt_name = self.scope.replace('/', '_') + '_opt%d' % len(self._dev_opt) + self._dev_opt[dev] = self.optimizer_class(name=opt_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + self._dev_grads[dev] = [] + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + grads = self._dev_opt[dev].compute_gradients(loss, vars, gate_gradients=tf.train.Optimizer.GATE_NONE) # disable gating to reduce memory usage + grads = [(g, v) if g is not None else (tf.zeros_like(v), v) for g, v in grads] # replace disconnected gradients with zeros + self._dev_grads[dev].append(grads) + + # Construct training op to update the registered variables based on their gradients. + def apply_updates(self): + assert not self._updates_applied + self._updates_applied = True + devices = list(self._dev_grads.keys()) + total_grads = sum(len(grads) for grads in self._dev_grads.values()) + assert len(devices) >= 1 and total_grads >= 1 + ops = [] + with absolute_name_scope(self.scope): + + # Cast gradients to FP32 and calculate partial sum within each device. + dev_grads = OrderedDict() # device => [(grad, var), ...] + for dev_idx, dev in enumerate(devices): + with tf.name_scope('ProcessGrads%d' % dev_idx), tf.device(dev): + sums = [] + for gv in zip(*self._dev_grads[dev]): + assert all(v is gv[0][1] for g, v in gv) + g = [tf.cast(g, tf.float32) for g, v in gv] + g = g[0] if len(g) == 1 else tf.add_n(g) + sums.append((g, gv[0][1])) + dev_grads[dev] = sums + + # Sum gradients across devices. + if len(devices) > 1: + with tf.name_scope('SumAcrossGPUs'), tf.device(None): + for var_idx, grad_shape in enumerate(self._grad_shapes): + g = [dev_grads[dev][var_idx][0] for dev in devices] + if np.prod(grad_shape): # nccl does not support zero-sized tensors + g = tf.contrib.nccl.all_sum(g) + for dev, gg in zip(devices, g): + dev_grads[dev][var_idx] = (gg, dev_grads[dev][var_idx][1]) + + # Apply updates separately on each device. + for dev_idx, (dev, grads) in enumerate(dev_grads.items()): + with tf.name_scope('ApplyGrads%d' % dev_idx), tf.device(dev): + + # Scale gradients as needed. + if self.use_loss_scaling or total_grads > 1: + with tf.name_scope('Scale'): + coef = tf.constant(np.float32(1.0 / total_grads), name='coef') + coef = self.undo_loss_scaling(coef) + grads = [(g * coef, v) for g, v in grads] + + # Check for overflows. + with tf.name_scope('CheckOverflow'): + grad_ok = tf.reduce_all(tf.stack([tf.reduce_all(tf.is_finite(g)) for g, v in grads])) + + # Update weights and adjust loss scaling. + with tf.name_scope('UpdateWeights'): + opt = self._dev_opt[dev] + ls_var = self.get_loss_scaling_var(dev) + if not self.use_loss_scaling: + ops.append(tf.cond(grad_ok, lambda: opt.apply_gradients(grads), tf.no_op)) + else: + ops.append(tf.cond(grad_ok, + lambda: tf.group(tf.assign_add(ls_var, self.loss_scaling_inc), opt.apply_gradients(grads)), + lambda: tf.group(tf.assign_sub(ls_var, self.loss_scaling_dec)))) + + # Report statistics on the last device. + if dev == devices[-1]: + with tf.name_scope('Statistics'): + ops.append(autosummary(self.id + '/learning_rate', self.learning_rate)) + ops.append(autosummary(self.id + '/overflow_frequency', tf.where(grad_ok, 0, 1))) + if self.use_loss_scaling: + ops.append(autosummary(self.id + '/loss_scaling_log2', ls_var)) + + # Initialize variables and group everything into a single op. + self.reset_optimizer_state() + init_uninited_vars(list(self._dev_ls_var.values())) + return tf.group(*ops, name='TrainingOp') + + # Reset internal state of the underlying optimizer. + def reset_optimizer_state(self): + run([var.initializer for opt in self._dev_opt.values() for var in opt.variables()]) + + # Get or create variable representing log2 of the current dynamic loss scaling factor. + def get_loss_scaling_var(self, device): + if not self.use_loss_scaling: + return None + if device not in self._dev_ls_var: + with absolute_name_scope(self.scope + '/LossScalingVars'), tf.control_dependencies(None): + self._dev_ls_var[device] = tf.Variable(np.float32(self.loss_scaling_init), name='loss_scaling_var') + return self._dev_ls_var[device] + + # Apply dynamic loss scaling for the given expression. + def apply_loss_scaling(self, value): + assert is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * exp2(self.get_loss_scaling_var(value.device)) + + # Undo the effect of dynamic loss scaling for the given expression. + def undo_loss_scaling(self, value): + assert is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * exp2(-self.get_loss_scaling_var(value.device)) + +#---------------------------------------------------------------------------- +# Generic network abstraction. +# +# Acts as a convenience wrapper for a parameterized network construction +# function, providing several utility methods and convenient access to +# the inputs/outputs/weights. +# +# Network objects can be safely pickled and unpickled for long-term +# archival purposes. The pickling works reliably as long as the underlying +# network construction function is defined in a standalone Python module +# that has no side effects or application-specific imports. + +network_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_network_import_modules = [] # Temporary modules create during pickle import. + +class Network: + def __init__(self, + name=None, # Network name. Used to select TensorFlow name and variable scopes. + func=None, # Fully qualified name of the underlying network construction function. + **static_kwargs): # Keyword arguments to be passed in to the network construction function. + + self._init_fields() + self.name = name + self.static_kwargs = dict(static_kwargs) + + # Init build func. + module, self._build_func_name = import_module(func) + self._build_module_src = inspect.getsource(module) + self._build_func = find_obj_in_module(module, self._build_func_name) + + # Init graph. + self._init_graph() + self.reset_vars() + + def _init_fields(self): + self.name = None # User-specified name, defaults to build func name if None. + self.scope = None # Unique TF graph scope, derived from the user-specified name. + self.static_kwargs = dict() # Arguments passed to the user-supplied build func. + self.num_inputs = 0 # Number of input tensors. + self.num_outputs = 0 # Number of output tensors. + self.input_shapes = [[]] # Input tensor shapes (NC or NCHW), including minibatch dimension. + self.output_shapes = [[]] # Output tensor shapes (NC or NCHW), including minibatch dimension. + self.input_shape = [] # Short-hand for input_shapes[0]. + self.output_shape = [] # Short-hand for output_shapes[0]. + self.input_templates = [] # Input placeholders in the template graph. + self.output_templates = [] # Output tensors in the template graph. + self.input_names = [] # Name string for each input. + self.output_names = [] # Name string for each output. + self.vars = OrderedDict() # All variables (localname => var). + self.trainables = OrderedDict() # Trainable variables (localname => var). + self._build_func = None # User-supplied build function that constructs the network. + self._build_func_name = None # Name of the build function. + self._build_module_src = None # Full source code of the module containing the build function. + self._run_cache = dict() # Cached graph data for Network.run(). + + def _init_graph(self): + # Collect inputs. + self.input_names = [] + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self.input_names.append(param.name) + self.num_inputs = len(self.input_names) + assert self.num_inputs >= 1 + + # Choose name and scope. + if self.name is None: + self.name = self._build_func_name + self.scope = tf.get_default_graph().unique_name(self.name.replace('/', '_'), mark_as_used=False) + + # Build template graph. + with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): + assert tf.get_variable_scope().name == self.scope + with absolute_name_scope(self.scope): # ignore surrounding name_scope + with tf.control_dependencies(None): # ignore surrounding control_dependencies + self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + out_expr = self._build_func(*self.input_templates, is_template_graph=True, **self.static_kwargs) + + # Collect outputs. + assert is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self.output_templates = [out_expr] if is_tf_expression(out_expr) else list(out_expr) + self.output_names = [t.name.split('/')[-1].split(':')[0] for t in self.output_templates] + self.num_outputs = len(self.output_templates) + assert self.num_outputs >= 1 + + # Populate remaining fields. + self.input_shapes = [shape_to_list(t.shape) for t in self.input_templates] + self.output_shapes = [shape_to_list(t.shape) for t in self.output_templates] + self.input_shape = self.input_shapes[0] + self.output_shape = self.output_shapes[0] + self.vars = OrderedDict([(self.get_var_localname(var), var) for var in tf.global_variables(self.scope + '/')]) + self.trainables = OrderedDict([(self.get_var_localname(var), var) for var in tf.trainable_variables(self.scope + '/')]) + + # Run initializers for all variables defined by this network. + def reset_vars(self): + run([var.initializer for var in self.vars.values()]) + + # Run initializers for all trainable variables defined by this network. + def reset_trainables(self): + run([var.initializer for var in self.trainables.values()]) + + # Get TensorFlow expression(s) for the output(s) of this network, given the inputs. + def get_output_for(self, *in_expr, return_as_list=False, **dynamic_kwargs): + assert len(in_expr) == self.num_inputs + all_kwargs = dict(self.static_kwargs) + all_kwargs.update(dynamic_kwargs) + with tf.variable_scope(self.scope, reuse=True): + assert tf.get_variable_scope().name == self.scope + named_inputs = [tf.identity(expr, name=name) for expr, name in zip(in_expr, self.input_names)] + out_expr = self._build_func(*named_inputs, **all_kwargs) + assert is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if is_tf_expression(out_expr) else list(out_expr) + return out_expr + + # Get the local name of a given variable, excluding any surrounding name scopes. + def get_var_localname(self, var_or_globalname): + assert is_tf_expression(var_or_globalname) or isinstance(var_or_globalname, str) + globalname = var_or_globalname if isinstance(var_or_globalname, str) else var_or_globalname.name + assert globalname.startswith(self.scope + '/') + localname = globalname[len(self.scope) + 1:] + localname = localname.split(':')[0] + return localname + + # Find variable by local or global name. + def find_var(self, var_or_localname): + assert is_tf_expression(var_or_localname) or isinstance(var_or_localname, str) + return self.vars[var_or_localname] if isinstance(var_or_localname, str) else var_or_localname + + # Get the value of a given variable as NumPy array. + # Note: This method is very inefficient -- prefer to use tfutil.run(list_of_vars) whenever possible. + def get_var(self, var_or_localname): + return self.find_var(var_or_localname).eval() + + # Set the value of a given variable based on the given NumPy array. + # Note: This method is very inefficient -- prefer to use tfutil.set_vars() whenever possible. + def set_var(self, var_or_localname, new_value): + return set_vars({self.find_var(var_or_localname): new_value}) + + # Pickle export. + def __getstate__(self): + return { + 'version': 2, + 'name': self.name, + 'static_kwargs': self.static_kwargs, + 'build_module_src': self._build_module_src, + 'build_func_name': self._build_func_name, + 'variables': list(zip(self.vars.keys(), run(list(self.vars.values()))))} + + # Pickle import. + def __setstate__(self, state): + self._init_fields() + + # Execute custom import handlers. + for handler in network_import_handlers: + state = handler(state) + + # Set basic fields. + assert state['version'] == 2 + self.name = state['name'] + self.static_kwargs = state['static_kwargs'] + self._build_module_src = state['build_module_src'] + self._build_func_name = state['build_func_name'] + + # Parse imported module. + module = imp.new_module('_tfutil_network_import_module_%d' % len(_network_import_modules)) + exec(self._build_module_src, module.__dict__) + self._build_func = find_obj_in_module(module, self._build_func_name) + _network_import_modules.append(module) # avoid gc + + # Init graph. + self._init_graph() + self.reset_vars() + set_vars({self.find_var(name): value for name, value in state['variables']}) + + # Create a clone of this network with its own copy of the variables. + def clone(self, name=None): + net = object.__new__(Network) + net._init_fields() + net.name = name if name is not None else self.name + net.static_kwargs = dict(self.static_kwargs) + net._build_module_src = self._build_module_src + net._build_func_name = self._build_func_name + net._build_func = self._build_func + net._init_graph() + net.copy_vars_from(self) + return net + + # Copy the values of all variables from the given network. + def copy_vars_from(self, src_net): + assert isinstance(src_net, Network) + name_to_value = run({name: src_net.find_var(name) for name in self.vars.keys()}) + set_vars({self.find_var(name): value for name, value in name_to_value.items()}) + + # Copy the values of all trainable variables from the given network. + def copy_trainables_from(self, src_net): + assert isinstance(src_net, Network) + name_to_value = run({name: src_net.find_var(name) for name in self.trainables.keys()}) + set_vars({self.find_var(name): value for name, value in name_to_value.items()}) + + # Create new network with the given parameters, and copy all variables from this network. + def convert(self, name=None, func=None, **static_kwargs): + net = Network(name, func, **static_kwargs) + net.copy_vars_from(self) + return net + + # Construct a TensorFlow op that updates the variables of this network + # to be slightly closer to those of the given network. + def setup_as_moving_average_of(self, src_net, beta=0.99, beta_nontrainable=0.0): + assert isinstance(src_net, Network) + with absolute_name_scope(self.scope): + with tf.name_scope('MovingAvg'): + ops = [] + for name, var in self.vars.items(): + if name in src_net.vars: + cur_beta = beta if name in self.trainables else beta_nontrainable + new_value = lerp(src_net.vars[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + # Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + def run(self, *in_arrays, + return_as_list = False, # True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress = False, # Print progress to the console? Useful for very large input arrays. + minibatch_size = None, # Maximum minibatch size to use, None = disable batching. + num_gpus = 1, # Number of GPUs to use. + out_mul = 1.0, # Multiplicative constant to apply to the output(s). + out_add = 0.0, # Additive constant to apply to the output(s). + out_shrink = 1, # Shrink the spatial dimensions of the output(s) by the given factor. + out_dtype = None, # Convert the output to the specified data type. + **dynamic_kwargs): # Additional keyword arguments to pass into the network construction function. + + assert len(in_arrays) == self.num_inputs + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + key = str([list(sorted(dynamic_kwargs.items())), num_gpus, out_mul, out_add, out_shrink, out_dtype]) + + # Build graph. + if key not in self._run_cache: + with absolute_name_scope(self.scope + '/Run'), tf.control_dependencies(None): + in_split = list(zip(*[tf.split(x, num_gpus) for x in self.input_templates])) + out_split = [] + for gpu in range(num_gpus): + with tf.device('/gpu:%d' % gpu): + out_expr = self.get_output_for(*in_split[gpu], return_as_list=True, **dynamic_kwargs) + if out_mul != 1.0: + out_expr = [x * out_mul for x in out_expr] + if out_add != 0.0: + out_expr = [x + out_add for x in out_expr] + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + out_expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') for x in out_expr] + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + out_expr = [tf.round(x) for x in out_expr] + out_expr = [tf.saturate_cast(x, out_dtype) for x in out_expr] + out_split.append(out_expr) + self._run_cache[key] = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + + # Run minibatches. + out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + shape_to_list(expr.shape)[1:], expr.dtype.name) for expr in out_expr] + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print('\r%d / %d' % (mb_begin, num_items), end='') + mb_end = min(mb_begin + minibatch_size, num_items) + mb_in = [src[mb_begin : mb_end] for src in in_arrays] + mb_out = tf.get_default_session().run(out_expr, dict(zip(self.input_templates, mb_in))) + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin : mb_end] = src + + # Done. + if print_progress: + print('\r%d / %d' % (num_items, num_items)) + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + # Returns a list of (name, output_expr, trainable_vars) tuples corresponding to + # individual layers of the network. Mainly intended to be used for reporting. + def list_layers(self): + patterns_to_ignore = ['/Setter', '/new_value', '/Shape', '/strided_slice', '/Cast', '/concat'] + all_ops = tf.get_default_graph().get_operations() + all_ops = [op for op in all_ops if not any(p in op.name for p in patterns_to_ignore)] + layers = [] + + def recurse(scope, parent_ops, level): + prefix = scope + '/' + ops = [op for op in parent_ops if op.name == scope or op.name.startswith(prefix)] + + # Does not contain leaf nodes => expand immediate children. + if level == 0 or all('/' in op.name[len(prefix):] for op in ops): + visited = set() + for op in ops: + suffix = op.name[len(prefix):] + if '/' in suffix: + suffix = suffix[:suffix.index('/')] + if suffix not in visited: + recurse(prefix + suffix, ops, level + 1) + visited.add(suffix) + + # Otherwise => interpret as a layer. + else: + layer_name = scope[len(self.scope)+1:] + layer_output = ops[-1].outputs[0] + layer_trainables = [op.outputs[0] for op in ops if op.type.startswith('Variable') and self.get_var_localname(op.name) in self.trainables] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, all_ops, 0) + return layers + + # Print a summary table of the network structure. + def print_layers(self, title=None, hide_layers_with_no_params=False): + if title is None: title = self.name + print() + print('%-28s%-12s%-24s%-24s' % (title, 'Params', 'OutputShape', 'WeightShape')) + print('%-28s%-12s%-24s%-24s' % (('---',) * 4)) + + total_params = 0 + for layer_name, layer_output, layer_trainables in self.list_layers(): + weights = [var for var in layer_trainables if var.name.endswith('/weight:0')] + num_params = sum(np.prod(shape_to_list(var.shape)) for var in layer_trainables) + total_params += num_params + if hide_layers_with_no_params and num_params == 0: + continue + + print('%-28s%-12s%-24s%-24s' % ( + layer_name, + num_params if num_params else '-', + layer_output.shape, + weights[0].shape if len(weights) == 1 else '-')) + + print('%-28s%-12s%-24s%-24s' % (('---',) * 4)) + print('%-28s%-12s%-24s%-24s' % ('Total', total_params, '', '')) + print() + + # Construct summary ops to include histograms of all trainable parameters in TensorBoard. + def setup_weight_histograms(self, title=None): + if title is None: title = self.name + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for localname, var in self.trainables.items(): + if '/' in localname: + p = localname.split('/') + name = title + '_' + p[-1] + '/' + '_'.join(p[:-1]) + else: + name = title + '_toplevel/' + localname + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/train.py b/ContraCLIP/models/genforce/converters/pggan_official/train.py new file mode 100644 index 0000000000000000000000000000000000000000..1deb1220891716a440bb545965b00e9461b9ecdd --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/train.py @@ -0,0 +1,288 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import time +import numpy as np +import tensorflow as tf + +import config +import tfutil +import dataset +import misc + +#---------------------------------------------------------------------------- +# Choose the size and contents of the image snapshot grids that are exported +# periodically during training. + +def setup_snapshot_image_grid(G, training_set, + size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display. + layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label. + + # Select size. + gw = 1; gh = 1 + if size == '1080p': + gw = np.clip(1920 // G.output_shape[3], 3, 32) + gh = np.clip(1080 // G.output_shape[2], 2, 32) + if size == '4k': + gw = np.clip(3840 // G.output_shape[3], 7, 32) + gh = np.clip(2160 // G.output_shape[2], 4, 32) + + # Fill in reals and labels. + reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) + labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) + for idx in range(gw * gh): + x = idx % gw; y = idx // gw + while True: + real, label = training_set.get_minibatch_np(1) + if layout == 'row_per_class' and training_set.label_size > 0: + if label[0, y % training_set.label_size] == 0.0: + continue + reals[idx] = real[0] + labels[idx] = label[0] + break + + # Generate latents. + latents = misc.random_latents(gw * gh, G) + return (gw, gh), reals, labels, latents + +#---------------------------------------------------------------------------- +# Just-in-time processing of training images before feeding them to the networks. + +def process_reals(x, lod, mirror_augment, drange_data, drange_net): + with tf.name_scope('ProcessReals'): + with tf.name_scope('DynamicRange'): + x = tf.cast(x, tf.float32) + x = misc.adjust_dynamic_range(x, drange_data, drange_net) + if mirror_augment: + with tf.name_scope('MirrorAugment'): + s = tf.shape(x) + mask = tf.random_uniform([s[0], 1, 1, 1], 0.0, 1.0) + mask = tf.tile(mask, [1, s[1], s[2], s[3]]) + x = tf.where(mask < 0.5, x, tf.reverse(x, axis=[3])) + with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail. + s = tf.shape(x) + y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) + y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) + y = tf.tile(y, [1, 1, 1, 2, 1, 2]) + y = tf.reshape(y, [-1, s[1], s[2], s[3]]) + x = tfutil.lerp(x, y, lod - tf.floor(lod)) + with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks. + s = tf.shape(x) + factor = tf.cast(2 ** tf.floor(lod), tf.int32) + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x + +#---------------------------------------------------------------------------- +# Class for evaluating and storing the values of time-varying training parameters. + +class TrainingSchedule: + def __init__( + self, + cur_nimg, + training_set, + lod_initial_resolution = 4, # Image resolution used at the beginning. + lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution. + lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers. + minibatch_base = 16, # Maximum minibatch size, divided evenly among GPUs. + minibatch_dict = {}, # Resolution-specific overrides. + max_minibatch_per_gpu = {}, # Resolution-specific maximum minibatch size per GPU. + G_lrate_base = 0.001, # Learning rate for the generator. + G_lrate_dict = {}, # Resolution-specific overrides. + D_lrate_base = 0.001, # Learning rate for the discriminator. + D_lrate_dict = {}, # Resolution-specific overrides. + tick_kimg_base = 160, # Default interval of progress snapshots. + tick_kimg_dict = {4: 160, 8:140, 16:120, 32:100, 64:80, 128:60, 256:40, 512:20, 1024:10}): # Resolution-specific overrides. + + # Training phase. + self.kimg = cur_nimg / 1000.0 + phase_dur = lod_training_kimg + lod_transition_kimg + phase_idx = int(np.floor(self.kimg / phase_dur)) if phase_dur > 0 else 0 + phase_kimg = self.kimg - phase_idx * phase_dur + + # Level-of-detail and resolution. + self.lod = training_set.resolution_log2 + self.lod -= np.floor(np.log2(lod_initial_resolution)) + self.lod -= phase_idx + if lod_transition_kimg > 0: + self.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg + self.lod = max(self.lod, 0.0) + self.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(self.lod))) + + # Minibatch size. + self.minibatch = minibatch_dict.get(self.resolution, minibatch_base) + self.minibatch -= self.minibatch % config.num_gpus + if self.resolution in max_minibatch_per_gpu: + self.minibatch = min(self.minibatch, max_minibatch_per_gpu[self.resolution] * config.num_gpus) + + # Other parameters. + self.G_lrate = G_lrate_dict.get(self.resolution, G_lrate_base) + self.D_lrate = D_lrate_dict.get(self.resolution, D_lrate_base) + self.tick_kimg = tick_kimg_dict.get(self.resolution, tick_kimg_base) + +#---------------------------------------------------------------------------- +# Main training script. +# To run, comment/uncomment appropriate lines in config.py and launch train.py. + +def train_progressive_gan( + G_smoothing = 0.999, # Exponential running average of generator weights. + D_repeats = 1, # How many times the discriminator is trained per G iteration. + minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters. + reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? + total_kimg = 15000, # Total length of the training, measured in thousands of real images. + mirror_augment = False, # Enable mirror augment? + drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. + image_snapshot_ticks = 1, # How often to export image snapshots? + network_snapshot_ticks = 10, # How often to export network snapshots? + save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file? + save_weight_histograms = False, # Include weight histograms in the tfevents file? + resume_run_id = None, # Run ID or network pkl to resume training from, None = start from scratch. + resume_snapshot = None, # Snapshot index to resume training from, None = autodetect. + resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. + resume_time = 0.0): # Assumed wallclock time at the beginning. Affects reporting. + + maintenance_start_time = time.time() + training_set = dataset.load_dataset(data_dir=config.data_dir, verbose=True, **config.dataset) + + # Construct networks. + with tf.device('/gpu:0'): + if resume_run_id is not None: + network_pkl = misc.locate_network_pkl(resume_run_id, resume_snapshot) + print('Loading networks from "%s"...' % network_pkl) + G, D, Gs = misc.load_pkl(network_pkl) + else: + print('Constructing networks...') + G = tfutil.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **config.G) + D = tfutil.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **config.D) + Gs = G.clone('Gs') + Gs_update_op = Gs.setup_as_moving_average_of(G, beta=G_smoothing) + G.print_layers(); D.print_layers() + + print('Building TensorFlow graph...') + with tf.name_scope('Inputs'): + lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) + lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) + minibatch_in = tf.placeholder(tf.int32, name='minibatch_in', shape=[]) + minibatch_split = minibatch_in // config.num_gpus + reals, labels = training_set.get_minibatch_tf() + reals_split = tf.split(reals, config.num_gpus) + labels_split = tf.split(labels, config.num_gpus) + G_opt = tfutil.Optimizer(name='TrainG', learning_rate=lrate_in, **config.G_opt) + D_opt = tfutil.Optimizer(name='TrainD', learning_rate=lrate_in, **config.D_opt) + for gpu in range(config.num_gpus): + with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): + G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') + D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') + lod_assign_ops = [tf.assign(G_gpu.find_var('lod'), lod_in), tf.assign(D_gpu.find_var('lod'), lod_in)] + reals_gpu = process_reals(reals_split[gpu], lod_in, mirror_augment, training_set.dynamic_range, drange_net) + labels_gpu = labels_split[gpu] + with tf.name_scope('G_loss'), tf.control_dependencies(lod_assign_ops): + G_loss = tfutil.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_split, **config.G_loss) + with tf.name_scope('D_loss'), tf.control_dependencies(lod_assign_ops): + D_loss = tfutil.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_split, reals=reals_gpu, labels=labels_gpu, **config.D_loss) + G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) + D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) + G_train_op = G_opt.apply_updates() + D_train_op = D_opt.apply_updates() + + print('Setting up snapshot image grid...') + grid_size, grid_reals, grid_labels, grid_latents = setup_snapshot_image_grid(G, training_set, **config.grid) + sched = TrainingSchedule(total_kimg * 1000, training_set, **config.sched) + grid_fakes = Gs.run(grid_latents, grid_labels, minibatch_size=sched.minibatch//config.num_gpus) + + print('Setting up result dir...') + result_subdir = misc.create_result_subdir(config.result_dir, config.desc) + misc.save_image_grid(grid_reals, os.path.join(result_subdir, 'reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) + misc.save_image_grid(grid_fakes, os.path.join(result_subdir, 'fakes%06d.png' % 0), drange=drange_net, grid_size=grid_size) + summary_log = tf.summary.FileWriter(result_subdir) + if save_tf_graph: + summary_log.add_graph(tf.get_default_graph()) + if save_weight_histograms: + G.setup_weight_histograms(); D.setup_weight_histograms() + + print('Training...') + cur_nimg = int(resume_kimg * 1000) + cur_tick = 0 + tick_start_nimg = cur_nimg + tick_start_time = time.time() + train_start_time = tick_start_time - resume_time + prev_lod = -1.0 + while cur_nimg < total_kimg * 1000: + + # Choose training parameters and configure training ops. + sched = TrainingSchedule(cur_nimg, training_set, **config.sched) + training_set.configure(sched.minibatch, sched.lod) + if reset_opt_for_new_lod: + if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): + G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state() + prev_lod = sched.lod + + # Run training ops. + for repeat in range(minibatch_repeats): + for _ in range(D_repeats): + tfutil.run([D_train_op, Gs_update_op], {lod_in: sched.lod, lrate_in: sched.D_lrate, minibatch_in: sched.minibatch}) + cur_nimg += sched.minibatch + tfutil.run([G_train_op], {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_in: sched.minibatch}) + + # Perform maintenance tasks once per tick. + done = (cur_nimg >= total_kimg * 1000) + if cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: + cur_tick += 1 + cur_time = time.time() + tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 + tick_start_nimg = cur_nimg + tick_time = cur_time - tick_start_time + total_time = cur_time - train_start_time + maintenance_time = tick_start_time - maintenance_start_time + maintenance_start_time = cur_time + + # Report progress. + print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %.1f' % ( + tfutil.autosummary('Progress/tick', cur_tick), + tfutil.autosummary('Progress/kimg', cur_nimg / 1000.0), + tfutil.autosummary('Progress/lod', sched.lod), + tfutil.autosummary('Progress/minibatch', sched.minibatch), + misc.format_time(tfutil.autosummary('Timing/total_sec', total_time)), + tfutil.autosummary('Timing/sec_per_tick', tick_time), + tfutil.autosummary('Timing/sec_per_kimg', tick_time / tick_kimg), + tfutil.autosummary('Timing/maintenance_sec', maintenance_time))) + tfutil.autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) + tfutil.autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) + tfutil.save_summaries(summary_log, cur_nimg) + + # Save snapshots. + if cur_tick % image_snapshot_ticks == 0 or done: + grid_fakes = Gs.run(grid_latents, grid_labels, minibatch_size=sched.minibatch//config.num_gpus) + misc.save_image_grid(grid_fakes, os.path.join(result_subdir, 'fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) + if cur_tick % network_snapshot_ticks == 0 or done: + misc.save_pkl((G, D, Gs), os.path.join(result_subdir, 'network-snapshot-%06d.pkl' % (cur_nimg // 1000))) + + # Record start time of the next tick. + tick_start_time = time.time() + + # Write final results. + misc.save_pkl((G, D, Gs), os.path.join(result_subdir, 'network-final.pkl')) + summary_log.close() + open(os.path.join(result_subdir, '_training-done.txt'), 'wt').close() + +#---------------------------------------------------------------------------- +# Main entry point. +# Calls the function indicated in config.py. + +if __name__ == "__main__": + misc.init_output_logging() + np.random.seed(config.random_seed) + print('Initializing TensorFlow...') + os.environ.update(config.env) + tfutil.init_tf(config.tf_config) + print('Running %s()...' % config.train['func']) + tfutil.call_func_by_name(**config.train) + print('Exiting...') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/pggan_official/util_scripts.py b/ContraCLIP/models/genforce/converters/pggan_official/util_scripts.py new file mode 100644 index 0000000000000000000000000000000000000000..0fc61d8eb00d604ab078882acb6f4b53374c0c86 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/pggan_official/util_scripts.py @@ -0,0 +1,239 @@ +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +import os +import time +import re +import bisect +from collections import OrderedDict +import numpy as np +import tensorflow as tf +import scipy.ndimage +import scipy.misc + +import config +import misc +import tfutil +import train +import dataset + +#---------------------------------------------------------------------------- +# Generate random images or image grids using a previously trained network. +# To run, uncomment the appropriate line in config.py and launch train.py. + +def generate_fake_images(run_id, snapshot=None, grid_size=[1,1], num_pngs=1, image_shrink=1, png_prefix=None, random_seed=1000, minibatch_size=8): + network_pkl = misc.locate_network_pkl(run_id, snapshot) + if png_prefix is None: + png_prefix = misc.get_id_string_for_network_pkl(network_pkl) + '-' + random_state = np.random.RandomState(random_seed) + + print('Loading network from "%s"...' % network_pkl) + G, D, Gs = misc.load_network_pkl(run_id, snapshot) + + result_subdir = misc.create_result_subdir(config.result_dir, config.desc) + for png_idx in range(num_pngs): + print('Generating png %d / %d...' % (png_idx, num_pngs)) + latents = misc.random_latents(np.prod(grid_size), Gs, random_state=random_state) + labels = np.zeros([latents.shape[0], 0], np.float32) + images = Gs.run(latents, labels, minibatch_size=minibatch_size, num_gpus=config.num_gpus, out_mul=127.5, out_add=127.5, out_shrink=image_shrink, out_dtype=np.uint8) + misc.save_image_grid(images, os.path.join(result_subdir, '%s%06d.png' % (png_prefix, png_idx)), [0,255], grid_size) + open(os.path.join(result_subdir, '_done.txt'), 'wt').close() + +#---------------------------------------------------------------------------- +# Generate MP4 video of random interpolations using a previously trained network. +# To run, uncomment the appropriate line in config.py and launch train.py. + +def generate_interpolation_video(run_id, snapshot=None, grid_size=[1,1], image_shrink=1, image_zoom=1, duration_sec=60.0, smoothing_sec=1.0, mp4=None, mp4_fps=30, mp4_codec='libx265', mp4_bitrate='16M', random_seed=1000, minibatch_size=8): + network_pkl = misc.locate_network_pkl(run_id, snapshot) + if mp4 is None: + mp4 = misc.get_id_string_for_network_pkl(network_pkl) + '-lerp.mp4' + num_frames = int(np.rint(duration_sec * mp4_fps)) + random_state = np.random.RandomState(random_seed) + + print('Loading network from "%s"...' % network_pkl) + G, D, Gs = misc.load_network_pkl(run_id, snapshot) + + print('Generating latent vectors...') + shape = [num_frames, np.prod(grid_size)] + Gs.input_shape[1:] # [frame, image, channel, component] + all_latents = random_state.randn(*shape).astype(np.float32) + all_latents = scipy.ndimage.gaussian_filter(all_latents, [smoothing_sec * mp4_fps] + [0] * len(Gs.input_shape), mode='wrap') + all_latents /= np.sqrt(np.mean(np.square(all_latents))) + + # Frame generation func for moviepy. + def make_frame(t): + frame_idx = int(np.clip(np.round(t * mp4_fps), 0, num_frames - 1)) + latents = all_latents[frame_idx] + labels = np.zeros([latents.shape[0], 0], np.float32) + images = Gs.run(latents, labels, minibatch_size=minibatch_size, num_gpus=config.num_gpus, out_mul=127.5, out_add=127.5, out_shrink=image_shrink, out_dtype=np.uint8) + grid = misc.create_image_grid(images, grid_size).transpose(1, 2, 0) # HWC + if image_zoom > 1: + grid = scipy.ndimage.zoom(grid, [image_zoom, image_zoom, 1], order=0) + if grid.shape[2] == 1: + grid = grid.repeat(3, 2) # grayscale => RGB + return grid + + # Generate video. + import moviepy.editor # pip install moviepy + result_subdir = misc.create_result_subdir(config.result_dir, config.desc) + moviepy.editor.VideoClip(make_frame, duration=duration_sec).write_videofile(os.path.join(result_subdir, mp4), fps=mp4_fps, codec='libx264', bitrate=mp4_bitrate) + open(os.path.join(result_subdir, '_done.txt'), 'wt').close() + +#---------------------------------------------------------------------------- +# Generate MP4 video of training progress for a previous training run. +# To run, uncomment the appropriate line in config.py and launch train.py. + +def generate_training_video(run_id, duration_sec=20.0, time_warp=1.5, mp4=None, mp4_fps=30, mp4_codec='libx265', mp4_bitrate='16M'): + src_result_subdir = misc.locate_result_subdir(run_id) + if mp4 is None: + mp4 = os.path.basename(src_result_subdir) + '-train.mp4' + + # Parse log. + times = [] + snaps = [] # [(png, kimg, lod), ...] + with open(os.path.join(src_result_subdir, 'log.txt'), 'rt') as log: + for line in log: + k = re.search(r'kimg ([\d\.]+) ', line) + l = re.search(r'lod ([\d\.]+) ', line) + t = re.search(r'time (\d+d)? *(\d+h)? *(\d+m)? *(\d+s)? ', line) + if k and l and t: + k = float(k.group(1)) + l = float(l.group(1)) + t = [int(t.group(i)[:-1]) if t.group(i) else 0 for i in range(1, 5)] + t = t[0] * 24*60*60 + t[1] * 60*60 + t[2] * 60 + t[3] + png = os.path.join(src_result_subdir, 'fakes%06d.png' % int(np.floor(k))) + if os.path.isfile(png): + times.append(t) + snaps.append((png, k, l)) + assert len(times) + + # Frame generation func for moviepy. + png_cache = [None, None] # [png, img] + def make_frame(t): + wallclock = ((t / duration_sec) ** time_warp) * times[-1] + png, kimg, lod = snaps[max(bisect.bisect(times, wallclock) - 1, 0)] + if png_cache[0] == png: + img = png_cache[1] + else: + img = scipy.misc.imread(png) + while img.shape[1] > 1920 or img.shape[0] > 1080: + img = img.astype(np.float32).reshape(img.shape[0]//2, 2, img.shape[1]//2, 2, -1).mean(axis=(1,3)) + png_cache[:] = [png, img] + img = misc.draw_text_label(img, 'lod %.2f' % lod, 16, img.shape[0]-4, alignx=0.0, aligny=1.0) + img = misc.draw_text_label(img, misc.format_time(int(np.rint(wallclock))), img.shape[1]//2, img.shape[0]-4, alignx=0.5, aligny=1.0) + img = misc.draw_text_label(img, '%.0f kimg' % kimg, img.shape[1]-16, img.shape[0]-4, alignx=1.0, aligny=1.0) + return img + + # Generate video. + import moviepy.editor # pip install moviepy + result_subdir = misc.create_result_subdir(config.result_dir, config.desc) + moviepy.editor.VideoClip(make_frame, duration=duration_sec).write_videofile(os.path.join(result_subdir, mp4), fps=mp4_fps, codec='libx264', bitrate=mp4_bitrate) + open(os.path.join(result_subdir, '_done.txt'), 'wt').close() + +#---------------------------------------------------------------------------- +# Evaluate one or more metrics for a previous training run. +# To run, uncomment one of the appropriate lines in config.py and launch train.py. + +def evaluate_metrics(run_id, log, metrics, num_images, real_passes, minibatch_size=None): + metric_class_names = { + 'swd': 'metrics.sliced_wasserstein.API', + 'fid': 'metrics.frechet_inception_distance.API', + 'is': 'metrics.inception_score.API', + 'msssim': 'metrics.ms_ssim.API', + } + + # Locate training run and initialize logging. + result_subdir = misc.locate_result_subdir(run_id) + snapshot_pkls = misc.list_network_pkls(result_subdir, include_final=False) + assert len(snapshot_pkls) >= 1 + log_file = os.path.join(result_subdir, log) + print('Logging output to', log_file) + misc.set_output_log_file(log_file) + + # Initialize dataset and select minibatch size. + dataset_obj, mirror_augment = misc.load_dataset_for_previous_run(result_subdir, verbose=True, shuffle_mb=0) + if minibatch_size is None: + minibatch_size = np.clip(8192 // dataset_obj.shape[1], 4, 256) + + # Initialize metrics. + metric_objs = [] + for name in metrics: + class_name = metric_class_names.get(name, name) + print('Initializing %s...' % class_name) + class_def = tfutil.import_obj(class_name) + image_shape = [3] + dataset_obj.shape[1:] + obj = class_def(num_images=num_images, image_shape=image_shape, image_dtype=np.uint8, minibatch_size=minibatch_size) + tfutil.init_uninited_vars() + mode = 'warmup' + obj.begin(mode) + for idx in range(10): + obj.feed(mode, np.random.randint(0, 256, size=[minibatch_size]+image_shape, dtype=np.uint8)) + obj.end(mode) + metric_objs.append(obj) + + # Print table header. + print() + print('%-10s%-12s' % ('Snapshot', 'Time_eval'), end='') + for obj in metric_objs: + for name, fmt in zip(obj.get_metric_names(), obj.get_metric_formatting()): + print('%-*s' % (len(fmt % 0), name), end='') + print() + print('%-10s%-12s' % ('---', '---'), end='') + for obj in metric_objs: + for fmt in obj.get_metric_formatting(): + print('%-*s' % (len(fmt % 0), '---'), end='') + print() + + # Feed in reals. + for title, mode in [('Reals', 'reals'), ('Reals2', 'fakes')][:real_passes]: + print('%-10s' % title, end='') + time_begin = time.time() + labels = np.zeros([num_images, dataset_obj.label_size], dtype=np.float32) + [obj.begin(mode) for obj in metric_objs] + for begin in range(0, num_images, minibatch_size): + end = min(begin + minibatch_size, num_images) + images, labels[begin:end] = dataset_obj.get_minibatch_np(end - begin) + if mirror_augment: + images = misc.apply_mirror_augment(images) + if images.shape[1] == 1: + images = np.tile(images, [1, 3, 1, 1]) # grayscale => RGB + [obj.feed(mode, images) for obj in metric_objs] + results = [obj.end(mode) for obj in metric_objs] + print('%-12s' % misc.format_time(time.time() - time_begin), end='') + for obj, vals in zip(metric_objs, results): + for val, fmt in zip(vals, obj.get_metric_formatting()): + print(fmt % val, end='') + print() + + # Evaluate each network snapshot. + for snapshot_idx, snapshot_pkl in enumerate(reversed(snapshot_pkls)): + prefix = 'network-snapshot-'; postfix = '.pkl' + snapshot_name = os.path.basename(snapshot_pkl) + assert snapshot_name.startswith(prefix) and snapshot_name.endswith(postfix) + snapshot_kimg = int(snapshot_name[len(prefix) : -len(postfix)]) + + print('%-10d' % snapshot_kimg, end='') + mode ='fakes' + [obj.begin(mode) for obj in metric_objs] + time_begin = time.time() + with tf.Graph().as_default(), tfutil.create_session(config.tf_config).as_default(): + G, D, Gs = misc.load_pkl(snapshot_pkl) + for begin in range(0, num_images, minibatch_size): + end = min(begin + minibatch_size, num_images) + latents = misc.random_latents(end - begin, Gs) + images = Gs.run(latents, labels[begin:end], num_gpus=config.num_gpus, out_mul=127.5, out_add=127.5, out_dtype=np.uint8) + if images.shape[1] == 1: + images = np.tile(images, [1, 3, 1, 1]) # grayscale => RGB + [obj.feed(mode, images) for obj in metric_objs] + results = [obj.end(mode) for obj in metric_objs] + print('%-12s' % misc.format_time(time.time() - time_begin), end='') + for obj, vals in zip(metric_objs, results): + for val, fmt in zip(vals, obj.get_metric_formatting()): + print(fmt % val, end='') + print() + print() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_converter.py b/ContraCLIP/models/genforce/converters/stylegan2_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..13ac321ea90852337d814f297e68db7e358ca1ad --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_converter.py @@ -0,0 +1,262 @@ +# python3.7 +"""Converts StyleGAN2 model weights from TensorFlow to PyTorch. + +The models can be trained through OR released by the repository: + +https://github.com/NVlabs/stylegan2 +""" + +import os +import sys +import pickle +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) + +# pylint: disable=wrong-import-position +from tqdm import tqdm +import numpy as np +import tensorflow as tf +import torch +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +from models import build_model +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image +# pylint: enable=wrong-import-position + +__all__ = ['convert_stylegan2_weight'] + +GAN_TPYE = 'stylegan2' +OFFICIAL_CODE_DIR = 'stylegan2_official' +BASE_DIR = os.path.dirname(os.path.relpath(__file__)) +CODE_PATH = os.path.join(BASE_DIR, OFFICIAL_CODE_DIR) + +TRUNC_PSI = 0.5 +TRUNC_LAYERS = 18 +RANDOMIZE_NOISE = False + + +def convert_stylegan2_weight(tf_weight_path, + pth_weight_path, + test_num=10, + save_test_image=False, + verbose=False): + """Converts the pre-trained StyleGAN2 weights. + + Args: + tf_weight_path: Path to the TensorFlow model to load weights from. + pth_weight_path: Path to the PyTorch model to save converted weights. + test_num: Number of samples used to test the conversion. (default: 10) + save_test_image: Whether to save the test images. (default: False) + verbose: Whether to print verbose log message. (default: False) + """ + sess = tf.compat.v1.InteractiveSession() + + print(f'========================================') + print(f'Loading TensorFlow weights from `{tf_weight_path}` ...') + sys.path.insert(0, CODE_PATH) + with open(tf_weight_path, 'rb') as f: + G, D, Gs = pickle.load(f) + sys.path.pop(0) + print(f'Successfully loaded!') + print(f'--------------------') + + z_space_dim = G.input_shapes[0][1] + label_size = G.input_shapes[1][1] + w_space_dim = G.components.mapping.output_shape[2] + image_channels = G.output_shape[1] + resolution = G.output_shape[2] + repeat_w = True + + print(f'Converting TensorFlow weights (G) to PyTorch version ...') + G_vars = dict(G.__getstate__()['variables']) + G_vars.update(dict(G.components.mapping.__getstate__()['variables'])) + G_vars.update(dict(G.components.synthesis.__getstate__()['variables'])) + G_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=repeat_w, + image_channels=image_channels) + G_state_dict = G_pth.state_dict() + for pth_var_name, tf_var_name in G_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in G_vars + assert pth_var_name in G_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(G_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'mod_weight' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelConcat' in tf_var_name: + pass + else: + var = var.permute(3, 2, 0, 1) + G_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (Gs) to PyTorch version ...') + Gs_vars = dict(Gs.__getstate__()['variables']) + Gs_vars.update(dict(Gs.components.mapping.__getstate__()['variables'])) + Gs_vars.update(dict(Gs.components.synthesis.__getstate__()['variables'])) + Gs_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=True, + image_channels=image_channels) + Gs_state_dict = Gs_pth.state_dict() + for pth_var_name, tf_var_name in Gs_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in Gs_vars + assert pth_var_name in Gs_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(Gs_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'mod_weight' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelConcat' in tf_var_name: + pass + else: + var = var.permute(3, 2, 0, 1) + Gs_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (D) to PyTorch version ...') + D_vars = dict(D.__getstate__()['variables']) + D_pth = build_model(gan_type=GAN_TPYE, + module='discriminator', + resolution=resolution, + label_size=label_size, + image_channels=image_channels) + D_state_dict = D_pth.state_dict() + for pth_var_name, tf_var_name in D_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in D_vars + assert pth_var_name in D_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(D_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'Output' in tf_var_name: + var = var.permute(1, 0) + else: + var = var.permute(3, 2, 0, 1) + D_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Saving pth weights to `{pth_weight_path}` ...') + state_dict = { + 'generator': G_state_dict, + 'discriminator': D_state_dict, + 'generator_smooth': Gs_state_dict, + } + torch.save(state_dict, pth_weight_path) + print(f'Successfully saved!') + print(f'--------------------') + + # Start testing if needed. + if test_num <= 0 or not tf.test.is_built_with_cuda(): + warnings.warn(f'Skip testing the converted weights!') + sess.close() + return + + if save_test_image: + html = HtmlPageVisualizer(num_rows=test_num, num_cols=3) + html.set_headers(['Index', 'Before Conversion', 'After Conversion']) + for i in range(test_num): + html.set_cell(i, 0, text=f'{i}') + + print(f'Testing conversion results ...') + G_pth.load_state_dict(G_state_dict) + D_pth.load_state_dict(D_state_dict) + Gs_pth.load_state_dict(Gs_state_dict) + G_pth.eval().cuda() + D_pth.eval().cuda() + Gs_pth.eval().cuda() + + gs_distance = 0.0 + dg_distance = 0.0 + for i in tqdm(range(test_num)): + # Test Gs(z). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + pth_label = None + tf_output = Gs.run(code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE) + pth_output = Gs_pth(pth_code, + label=pth_label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: Gs distance {distance:.6e}.') + gs_distance += distance + + if save_test_image: + html.set_cell(i, 1, image=postprocess_image(tf_output)[0]) + html.set_cell(i, 2, image=postprocess_image(pth_output)[0]) + + # Test D(G(z)). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + pth_label = None + tf_image = G.run(code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE) + tf_output = D.run(tf_image, label) + pth_image = G_pth(pth_code, + label=pth_label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + pth_output = D_pth(pth_image, pth_label) + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: D(G) distance {distance:.6e}.') + dg_distance += distance + + print(f'Average Gs distance is {gs_distance / test_num:.6e}.') + print(f'Average D(G) distance is {dg_distance / test_num:.6e}.') + print(f'========================================') + + if save_test_image: + html.save(f'{pth_weight_path}.conversion_test.html') + + sess.close() diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/Dockerfile b/ContraCLIP/models/genforce/converters/stylegan2_official/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ab45a553e0d49878585054e690aba74f2ca939ff --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/Dockerfile @@ -0,0 +1,11 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +FROM tensorflow/tensorflow:1.15.0-gpu-py3 + +RUN pip install scipy==1.3.3 +RUN pip install requests==2.22.0 +RUN pip install Pillow==6.2.1 diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/README.md b/ContraCLIP/models/genforce/converters/stylegan2_official/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f8d46bb85b8476d494e47cd658427a753d92be4 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/README.md @@ -0,0 +1,220 @@ +## StyleGAN2 — Official TensorFlow Implementation + +![Teaser image](./docs/stylegan2-teaser-1024x256.png) + +**Analyzing and Improving the Image Quality of StyleGAN**
+Tero Karras, Samuli Laine, Miika Aittala, Janne Hellsten, Jaakko Lehtinen, Timo Aila
+ +Paper: http://arxiv.org/abs/1912.04958
+Video: https://youtu.be/c-NJtV9Jvp0
+ +Abstract: *The style-based GAN architecture (StyleGAN) yields state-of-the-art results in data-driven unconditional generative image modeling. We expose and analyze several of its characteristic artifacts, and propose changes in both model architecture and training methods to address them. In particular, we redesign generator normalization, revisit progressive growing, and regularize the generator to encourage good conditioning in the mapping from latent vectors to images. In addition to improving image quality, this path length regularizer yields the additional benefit that the generator becomes significantly easier to invert. This makes it possible to reliably detect if an image is generated by a particular network. We furthermore visualize how well the generator utilizes its output resolution, and identify a capacity problem, motivating us to train larger models for additional quality improvements. Overall, our improved model redefines the state of the art in unconditional image modeling, both in terms of existing distribution quality metrics as well as perceived image quality.* + +For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)
+For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)
+ +| Additional material |   +| :--- | :---------- +| [StyleGAN2](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7) | Main Google Drive folder +| ├  [stylegan2-paper.pdf](https://drive.google.com/open?id=1fnF-QsiQeKaxF-HbvFiGtzHF_Bf3CzJu) | High-quality version of the paper +| ├  [stylegan2-video.mp4](https://drive.google.com/open?id=1f_gbKW6FUUHKkUxciJ_lQx29mCq_fSBy) | High-quality version of the video +| ├  [images](https://drive.google.com/open?id=1Sak157_DLX84ytqHHqZaH_59HoEWzfB7) | Example images produced using our method +| │  ├  [curated-images](https://drive.google.com/open?id=1ydWb8xCHzDKMTW9kQ7sL-B1R0zATHVHp) | Hand-picked images showcasing our results +| │  └  [100k-generated-images](https://drive.google.com/open?id=1BA2OZ1GshdfFZGYZPob5QWOGBuJCdu5q) | Random images with and without truncation +| ├  [videos](https://drive.google.com/open?id=1yXDV96SFXoUiZKU7AyE6DyKgDpIk4wUZ) | Individual clips of the video as high-quality MP4 +| └  [networks](https://drive.google.com/open?id=1yanUI9m4b4PWzR0eurKNq6JR1Bbfbh6L) | Pre-trained networks +|    ├  [stylegan2-ffhq-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-f.pkl) | StyleGAN2 for FFHQ dataset at 1024×1024 +|    ├  [stylegan2-car-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-f.pkl) | StyleGAN2 for LSUN Car dataset at 512×384 +|    ├  [stylegan2-cat-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-cat-config-f.pkl) | StyleGAN2 for LSUN Cat dataset at 256×256 +|    ├  [stylegan2-church-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-church-config-f.pkl) | StyleGAN2 for LSUN Church dataset at 256×256 +|    ├  [stylegan2-horse-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-horse-config-f.pkl) | StyleGAN2 for LSUN Horse dataset at 256×256 +|    └ ⋯ | Other training configurations used in the paper + +## Requirements + +* Both Linux and Windows are supported. Linux is recommended for performance and compatibility reasons. +* 64-bit Python 3.6 installation. We recommend Anaconda3 with numpy 1.14.3 or newer. +* TensorFlow 1.14 or 1.15 with GPU support. The code does not support TensorFlow 2.0. +* On Windows, you need to use TensorFlow 1.14 — TensorFlow 1.15 will not work. +* One or more high-end NVIDIA GPUs, NVIDIA drivers, CUDA 10.0 toolkit and cuDNN 7.5. To reproduce the results reported in the paper, you need an NVIDIA GPU with at least 16 GB of DRAM. +* Docker users: use the [provided Dockerfile](./Dockerfile) to build an image with the required library dependencies. + +StyleGAN2 relies on custom TensorFlow ops that are compiled on the fly using [NVCC](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html). To test that your NVCC installation is working correctly, run: + +```.bash +nvcc test_nvcc.cu -o test_nvcc -run +| CPU says hello. +| GPU says hello. +``` + +On Windows, the compilation requires Microsoft Visual Studio to be in `PATH`. We recommend installing [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/) and adding into `PATH` using `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"`. + +## Using pre-trained networks + +Pre-trained networks are stored as `*.pkl` files on the [StyleGAN2 Google Drive folder](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7). Below, you can either reference them directly using the syntax `gdrive:networks/.pkl`, or download them manually and reference by filename. + +```.bash +# Generate uncurated ffhq images (matches paper Figure 12) +python run_generator.py generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --seeds=6600-6625 --truncation-psi=0.5 + +# Generate curated ffhq images (matches paper Figure 11) +python run_generator.py generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --seeds=66,230,389,1518 --truncation-psi=1.0 + +# Generate uncurated car images +python run_generator.py generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --seeds=6000-6025 --truncation-psi=0.5 + +# Example of style mixing (matches the corresponding video clip) +python run_generator.py style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0 +``` + +The results are placed in `results//*.png`. You can change the location with `--result-dir`. For example, `--result-dir=~/my-stylegan2-results`. + +You can import the networks in your own Python code using `pickle.load()`. For this to work, you need to include the `dnnlib` source directory in `PYTHONPATH` and create a default TensorFlow session by calling `dnnlib.tflib.init_tf()`. See [run_generator.py](./run_generator.py) and [pretrained_networks.py](./pretrained_networks.py) for examples. + +## Preparing datasets + +Datasets are stored as multi-resolution TFRecords, similar to the [original StyleGAN](https://github.com/NVlabs/stylegan). Each dataset consists of multiple `*.tfrecords` files stored under a common directory, e.g., `~/datasets/ffhq/ffhq-r*.tfrecords`. In the following sections, the datasets are referenced using a combination of `--dataset` and `--data-dir` arguments, e.g., `--dataset=ffhq --data-dir=~/datasets`. + +**FFHQ**. To download the [Flickr-Faces-HQ](https://github.com/NVlabs/ffhq-dataset) dataset as multi-resolution TFRecords, run: + +```.bash +pushd ~ +git clone https://github.com/NVlabs/ffhq-dataset.git +cd ffhq-dataset +python download_ffhq.py --tfrecords +popd +python dataset_tool.py display ~/ffhq-dataset/tfrecords/ffhq +``` + +**LSUN**. Download the desired LSUN categories in LMDB format from the [LSUN project page](https://www.yf.io/p/lsun). To convert the data to multi-resolution TFRecords, run: + +```.bash +python dataset_tool.py create_lsun_wide ~/datasets/car ~/lsun/car_lmdb --width=512 --height=384 +python dataset_tool.py create_lsun ~/datasets/cat ~/lsun/cat_lmdb --resolution=256 +python dataset_tool.py create_lsun ~/datasets/church ~/lsun/church_outdoor_train_lmdb --resolution=256 +python dataset_tool.py create_lsun ~/datasets/horse ~/lsun/horse_lmdb --resolution=256 +``` + +**Custom**. Create custom datasets by placing all training images under a single directory. The images must be square-shaped and they must all have the same power-of-two dimensions. To convert the images to multi-resolution TFRecords, run: + +```.bash +python dataset_tool.py create_from_images ~/datasets/my-custom-dataset ~/my-custom-images +python dataset_tool.py display ~/datasets/my-custom-dataset +``` + +## Projecting images to latent space + +To find the matching latent vectors for a set of images, run: + +```.bash +# Project generated images +python run_projector.py project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --seeds=0,1,5 + +# Project real images +python run_projector.py project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --dataset=car --data-dir=~/datasets +``` + +## Training networks + +To reproduce the training runs for config F in Tables 1 and 3, run: + +```.bash +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=ffhq --mirror-augment=true +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=car --total-kimg=57000 +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=cat --total-kimg=88000 +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=church --total-kimg 88000 --gamma=100 +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=horse --total-kimg 100000 --gamma=100 +``` + +For other configurations, see `python run_training.py --help`. + +We have verified that the results match the paper when training with 1, 2, 4, or 8 GPUs. Note that training FFHQ at 1024×1024 resolution requires GPU(s) with at least 16 GB of memory. The following table lists typical training times using NVIDIA DGX-1 with 8 Tesla V100 GPUs: + +| Configuration | Resolution | Total kimg | 1 GPU | 2 GPUs | 4 GPUs | 8 GPUs | GPU mem | +| :------------ | :-------------: | :--------: | :-----: | :-----: | :-----: | :----: | :-----: | +| `config-f` | 1024×1024 | 25000 | 69d 23h | 36d 4h | 18d 14h | 9d 18h | 13.3 GB | +| `config-f` | 1024×1024 | 10000 | 27d 23h | 14d 11h | 7d 10h | 3d 22h | 13.3 GB | +| `config-e` | 1024×1024 | 25000 | 35d 11h | 18d 15h | 9d 15h | 5d 6h | 8.6 GB | +| `config-e` | 1024×1024 | 10000 | 14d 4h | 7d 11h | 3d 20h | 2d 3h | 8.6 GB | +| `config-f` | 256×256 | 25000 | 32d 13h | 16d 23h | 8d 21h | 4d 18h | 6.4 GB | +| `config-f` | 256×256 | 10000 | 13d 0h | 6d 19h | 3d 13h | 1d 22h | 6.4 GB | + +Training curves for FFHQ config F (StyleGAN2) compared to original StyleGAN using 8 GPUs: + +![Training curves](./docs/stylegan2-training-curves.png) + +After training, the resulting networks can be used the same way as the official pre-trained networks: + +```.bash +# Generate 1000 random images without truncation +python run_generator.py generate-images --seeds=0-999 --truncation-psi=1.0 \ + --network=results/00006-stylegan2-ffhq-8gpu-config-f/networks-final.pkl +``` + +## Evaluation metrics + +To reproduce the numbers for config F in Tables 1 and 3, run: + +```.bash +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=car +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-cat-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=cat +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-church-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=church +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-horse-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=horse +``` + +For other configurations, see the [StyleGAN2 Google Drive folder](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7). + +Note that the metrics are evaluated using a different random seed each time, so the results will vary between runs. In the paper, we reported the average result of running each metric 10 times. The following table lists the available metrics along with their expected runtimes and random variation: + +| Metric | FFHQ config F | 1 GPU | 2 GPUs | 4 GPUs | Description | +| :---------- | :------------: | :----: | :-----: | :----: | :---------- | +| `fid50k` | 2.84 ± 0.03 | 22 min | 14 min | 10 min | [Fréchet Inception Distance](https://arxiv.org/abs/1706.08500) +| `is50k` | 5.13 ± 0.02 | 23 min | 14 min | 8 min | [Inception Score](https://arxiv.org/abs/1606.03498) +| `ppl_zfull` | 348.0 ± 3.8 | 41 min | 22 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in Z, full paths +| `ppl_wfull` | 126.9 ± 0.2 | 42 min | 22 min | 13 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in W, full paths +| `ppl_zend` | 348.6 ± 3.0 | 41 min | 22 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in Z, path endpoints +| `ppl_wend` | 129.4 ± 0.8 | 40 min | 23 min | 13 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in W, path endpoints +| `ppl2_wend` | 145.0 ± 0.5 | 41 min | 23 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) without center crop +| `ls` | 154.2 / 4.27 | 10 hrs | 6 hrs | 4 hrs | [Linear Separability](https://arxiv.org/abs/1812.04948) +| `pr50k3` | 0.689 / 0.492 | 26 min | 17 min | 12 min | [Precision and Recall](https://arxiv.org/abs/1904.06991) + +Note that some of the metrics cache dataset-specific data on the disk, and they will take somewhat longer when run for the first time. + +## License + +Copyright © 2019, NVIDIA Corporation. All rights reserved. + +This work is made available under the Nvidia Source Code License-NC. To view a copy of this license, visit https://nvlabs.github.io/stylegan2/license.html + +## Citation + +``` +@article{Karras2019stylegan2, + title = {Analyzing and Improving the Image Quality of {StyleGAN}}, + author = {Tero Karras and Samuli Laine and Miika Aittala and Janne Hellsten and Jaakko Lehtinen and Timo Aila}, + journal = {CoRR}, + volume = {abs/1912.04958}, + year = {2019}, +} +``` + +## Acknowledgements + +We thank Ming-Yu Liu for an early review, Timo Viitanen for his help with code release, and Tero Kuosmanen for compute infrastructure. diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dataset_tool.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dataset_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..d8c4dc280b4e40219f649c31e304c058da7ed043 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dataset_tool.py @@ -0,0 +1,644 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Tool for creating multi-resolution TFRecords datasets.""" + +# pylint: disable=too-many-lines +import os +import sys +import glob +import argparse +import threading +import six.moves.queue as Queue # pylint: disable=import-error +import traceback +import numpy as np +import tensorflow as tf +import PIL.Image +import dnnlib.tflib as tflib + +from training import dataset + +#---------------------------------------------------------------------------- + +def error(msg): + print('Error: ' + msg) + exit(1) + +#---------------------------------------------------------------------------- + +class TFRecordExporter: + def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10): + self.tfrecord_dir = tfrecord_dir + self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) + self.expected_images = expected_images + self.cur_images = 0 + self.shape = None + self.resolution_log2 = None + self.tfr_writers = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + + if self.print_progress: + print('Creating dataset "%s"' % tfrecord_dir) + if not os.path.isdir(self.tfrecord_dir): + os.makedirs(self.tfrecord_dir) + assert os.path.isdir(self.tfrecord_dir) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for tfr_writer in self.tfr_writers: + tfr_writer.close() + self.tfr_writers = [] + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order. + order = np.arange(self.expected_images) + np.random.RandomState(123).shuffle(order) + return order + + def add_image(self, img): + if self.print_progress and self.cur_images % self.progress_interval == 0: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + if self.shape is None: + self.shape = img.shape + self.resolution_log2 = int(np.log2(self.shape[1])) + assert self.shape[0] in [1, 3] + assert self.shape[1] == self.shape[2] + assert self.shape[1] == 2**self.resolution_log2 + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for lod in range(self.resolution_log2 - 1): + tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod) + self.tfr_writers.append(tf.python_io.TFRecordWriter(tfr_file, tfr_opt)) + assert img.shape == self.shape + for lod, tfr_writer in enumerate(self.tfr_writers): + if lod: + img = img.astype(np.float32) + img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 + quant = np.rint(img).clip(0, 255).astype(np.uint8) + ex = tf.train.Example(features=tf.train.Features(feature={ + 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), + 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) + tfr_writer.write(ex.SerializeToString()) + self.cur_images += 1 + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + +#---------------------------------------------------------------------------- + +class ExceptionInfo(object): + def __init__(self): + self.value = sys.exc_info()[1] + self.traceback = traceback.format_exc() + +#---------------------------------------------------------------------------- + +class WorkerThread(threading.Thread): + def __init__(self, task_queue): + threading.Thread.__init__(self) + self.task_queue = task_queue + + def run(self): + while True: + func, args, result_queue = self.task_queue.get() + if func is None: + break + try: + result = func(*args) + except: + result = ExceptionInfo() + result_queue.put((result, args)) + +#---------------------------------------------------------------------------- + +class ThreadPool(object): + def __init__(self, num_threads): + assert num_threads >= 1 + self.task_queue = Queue.Queue() + self.result_queues = dict() + self.num_threads = num_threads + for _idx in range(self.num_threads): + thread = WorkerThread(self.task_queue) + thread.daemon = True + thread.start() + + def add_task(self, func, args=()): + assert hasattr(func, '__call__') # must be a function + if func not in self.result_queues: + self.result_queues[func] = Queue.Queue() + self.task_queue.put((func, args, self.result_queues[func])) + + def get_result(self, func): # returns (result, args) + result, args = self.result_queues[func].get() + if isinstance(result, ExceptionInfo): + print('\n\nWorker thread caught an exception:\n' + result.traceback) + raise result.value + return result, args + + def finish(self): + for _idx in range(self.num_threads): + self.task_queue.put((None, (), None)) + + def __enter__(self): # for 'with' statement + return self + + def __exit__(self, *excinfo): + self.finish() + + def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): + if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4 + assert max_items_in_flight >= 1 + results = [] + retire_idx = [0] + + def task_func(prepared, _idx): + return process_func(prepared) + + def retire_result(): + processed, (_prepared, idx) = self.get_result(task_func) + results[idx] = processed + while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: + yield post_func(results[retire_idx[0]]) + results[retire_idx[0]] = None + retire_idx[0] += 1 + + for idx, item in enumerate(item_iterator): + prepared = pre_func(item) + results.append(None) + self.add_task(func=task_func, args=(prepared, idx)) + while retire_idx[0] < idx - max_items_in_flight + 2: + for res in retire_result(): yield res + while retire_idx[0] < len(results): + for res in retire_result(): yield res + +#---------------------------------------------------------------------------- + +def display(tfrecord_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf({'gpu_options.allow_growth': True}) + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) + tflib.init_uninitialized_vars() + import cv2 # pip install opencv-python + + idx = 0 + while True: + try: + images, labels = dset.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + break + if idx == 0: + print('Displaying images') + cv2.namedWindow('dataset_tool') + print('Press SPACE or ENTER to advance, ESC to exit') + print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) + cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR + idx += 1 + if cv2.waitKey() == 27: + break + print('\nDisplayed %d images.' % idx) + +#---------------------------------------------------------------------------- + +def extract(tfrecord_dir, output_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf({'gpu_options.allow_growth': True}) + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) + tflib.init_uninitialized_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + try: + images, _labels = dset.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + img.save(os.path.join(output_dir, 'img%08d.png' % idx)) + idx += 1 + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): + max_label_size = 0 if ignore_labels else 'full' + print('Loading dataset "%s"' % tfrecord_dir_a) + tflib.init_tf({'gpu_options.allow_growth': True}) + dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) + print('Loading dataset "%s"' % tfrecord_dir_b) + dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) + tflib.init_uninitialized_vars() + + print('Comparing datasets') + idx = 0 + identical_images = 0 + identical_labels = 0 + while True: + if idx % 100 == 0: + print('%d\r' % idx, end='', flush=True) + try: + images_a, labels_a = dset_a.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + images_a, labels_a = None, None + try: + images_b, labels_b = dset_b.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + images_b, labels_b = None, None + if images_a is None or images_b is None: + if images_a is not None or images_b is not None: + print('Datasets contain different number of images') + break + if images_a.shape == images_b.shape and np.all(images_a == images_b): + identical_images += 1 + else: + print('Image %d is different' % idx) + if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): + identical_labels += 1 + else: + print('Label %d is different' % idx) + idx += 1 + print('Identical images: %d / %d' % (identical_images, idx)) + if not ignore_labels: + print('Identical labels: %d / %d' % (identical_labels, idx)) + +#---------------------------------------------------------------------------- + +def create_mnist(tfrecord_dir, mnist_dir): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: + labels = np.frombuffer(file.read(), np.uint8, offset=8) + images = images.reshape(-1, 1, 28, 28) + images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (60000,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + images = images.reshape(-1, 28, 28) + images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + + with TFRecordExporter(tfrecord_dir, num_images) as tfr: + rnd = np.random.RandomState(random_seed) + for _idx in range(num_images): + tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) + +#---------------------------------------------------------------------------- + +def create_cifar10(tfrecord_dir, cifar10_dir): + print('Loading CIFAR-10 from "%s"' % cifar10_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 6): + with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data['data'].reshape(-1, 3, 32, 32)) + labels.append(data['labels']) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_cifar100(tfrecord_dir, cifar100_dir): + print('Loading CIFAR-100 from "%s"' % cifar100_dir) + import pickle + with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images = data['data'].reshape(-1, 3, 32, 32) + labels = np.array(data['fine_labels']) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 99 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_svhn(tfrecord_dir, svhn_dir): + print('Loading SVHN from "%s"' % svhn_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 4): + with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data[0]) + labels.append(data[1]) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (73257,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images) as tfr: + for _idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + crop = np.min(img.shape[:2]) + img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + +#---------------------------------------------------------------------------- + +def create_lsun_wide(tfrecord_dir, lmdb_dir, width=512, height=384, max_images=None): + assert width == 2 ** int(np.round(np.log2(width))) + assert height <= width + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images, print_progress=False) as tfr: + for idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + + ch = int(np.round(width * img.shape[0] / img.shape[1])) + if img.shape[1] < width or ch < height: + continue + + img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((width, height), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + + canvas = np.zeros([3, width, width], dtype=np.uint8) + canvas[:, (width - height) // 2 : (width + height) // 2] = img + tfr.add_image(canvas) + print('\r%d / %d => %d ' % (idx + 1, total_images, tfr.cur_images), end='') + + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + print() + +#---------------------------------------------------------------------------- + +def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): + print('Loading CelebA from "%s"' % celeba_dir) + glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') + image_filenames = sorted(glob.glob(glob_pattern)) + expected_images = 202599 + if len(image_filenames) != expected_images: + error('Expected to find %d images' % expected_images) + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + assert img.shape == (218, 178, 3) + img = img[cy - 64 : cy + 64, cx - 64 : cx + 64] + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_images(tfrecord_dir, image_dir, shuffle): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): + print('Loading HDF5 archive from "%s"' % hdf5_filename) + import h5py # conda install h5py + with h5py.File(hdf5_filename, 'r') as hdf5_file: + hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3]) + with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0]) + for idx in range(order.size): + tfr.add_image(hdf5_data[order[idx]]) + npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' + if os.path.isfile(npy_filename): + tfr.add_labels(np.load(npy_filename)[order]) + +#---------------------------------------------------------------------------- + +def execute_cmdline(argv): + prog = argv[0] + parser = argparse.ArgumentParser( + prog = prog, + description = 'Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.', + epilog = 'Type "%s -h" for more information.' % prog) + + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + def add_command(cmd, desc, example=None): + epilog = 'Example: %s %s' % (prog, example) if example is not None else None + return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) + + p = add_command( 'display', 'Display images in dataset.', + 'display datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'extract', 'Extract images from dataset.', + 'extract datasets/mnist mnist-images') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + p.add_argument( 'output_dir', help='Directory to extract the images into') + + p = add_command( 'compare', 'Compare two datasets.', + 'compare datasets/mydataset datasets/mnist') + p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset') + p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_mnist', 'Create dataset for MNIST.', + 'create_mnist datasets/mnist ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + + p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.', + 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000) + p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123) + + p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.', + 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10') + + p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.', + 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100') + + p = add_command( 'create_svhn', 'Create dataset for SVHN.', + 'create_svhn datasets/svhn ~/downloads/svhn') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'svhn_dir', help='Directory containing SVHN') + + p = add_command( 'create_lsun', 'Create dataset for single LSUN category.', + 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_lsun_wide', 'Create LSUN dataset with non-square aspect ratio.', + 'create_lsun_wide datasets/lsun-car-512x384 ~/downloads/lsun/car_lmdb --width 512 --height 384') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--width', help='Output width (default: 512)', type=int, default=512) + p.add_argument( '--height', help='Output height (default: 384)', type=int, default=384) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_celeba', 'Create dataset for CelebA.', + 'create_celeba datasets/celeba ~/downloads/celeba') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'celeba_dir', help='Directory containing CelebA') + p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89) + p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) + + p = add_command( 'create_from_images', 'Create dataset from a directory full of images.', + 'create_from_images datasets/mydataset myimagedir') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.', + 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) + func = globals()[args.command] + del args.command + func(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + execute_cmdline(sys.argv) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e34112b628e3d526739681eac984c5c2db704814 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import submission + +from .submission.run_context import RunContext + +from .submission.submit import SubmitTarget +from .submission.submit import PathType +from .submission.submit import SubmitConfig +from .submission.submit import submit_run +from .submission.submit import get_path_from_template +from .submission.submit import convert_path +from .submission.submit import make_run_dir_path + +from .util import EasyDict + +submit_config: SubmitConfig = None # Package level variable for SubmitConfig which is only valid when inside the run function. diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..acf2fbee4b216cb9f2a0b73993fd1c7042e2248d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import run_context +from . import submit diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/internal/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/internal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f11279893d6056e8cb6f9e04e12aad07a776496 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/internal/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import local diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/internal/local.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/internal/local.py new file mode 100644 index 0000000000000000000000000000000000000000..c03c79e93ca19704157782a0bae556a7752b775c --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/internal/local.py @@ -0,0 +1,22 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +class TargetOptions(): + def __init__(self): + self.do_not_copy_source_files = False + +class Target(): + def __init__(self): + pass + + def finalize_submit_config(self, submit_config, host_run_dir): + print ('Local submit ', end='', flush=True) + submit_config.run_dir = host_run_dir + + def submit(self, submit_config, host_run_dir): + from ..submit import run_wrapper, convert_path + print('- run_dir: %s' % convert_path(submit_config.run_dir), flush=True) + return run_wrapper(submit_config) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/run_context.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/run_context.py new file mode 100644 index 0000000000000000000000000000000000000000..62fbb1afd86be9d5fa963a1958485a2fc6d1152a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/run_context.py @@ -0,0 +1,110 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helpers for managing the run/training loop.""" + +import datetime +import json +import os +import pprint +import time +import types + +from typing import Any + +from . import submit + +# Singleton RunContext +_run_context = None + +class RunContext(object): + """Helper class for managing the run/training loop. + + The context will hide the implementation details of a basic run/training loop. + It will set things up properly, tell if run should be stopped, and then cleans up. + User should call update periodically and use should_stop to determine if run should be stopped. + + Args: + submit_config: The SubmitConfig that is used for the current run. + config_module: (deprecated) The whole config module that is used for the current run. + """ + + def __init__(self, submit_config: submit.SubmitConfig, config_module: types.ModuleType = None): + global _run_context + # Only a single RunContext can be alive + assert _run_context is None + _run_context = self + self.submit_config = submit_config + self.should_stop_flag = False + self.has_closed = False + self.start_time = time.time() + self.last_update_time = time.time() + self.last_update_interval = 0.0 + self.progress_monitor_file_path = None + + # vestigial config_module support just prints a warning + if config_module is not None: + print("RunContext.config_module parameter support has been removed.") + + # write out details about the run to a text file + self.run_txt_data = {"task_name": submit_config.task_name, "host_name": submit_config.host_name, "start_time": datetime.datetime.now().isoformat(sep=" ")} + with open(os.path.join(submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + + def __enter__(self) -> "RunContext": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def update(self, loss: Any = 0, cur_epoch: Any = 0, max_epoch: Any = None) -> None: + """Do general housekeeping and keep the state of the context up-to-date. + Should be called often enough but not in a tight loop.""" + assert not self.has_closed + + self.last_update_interval = time.time() - self.last_update_time + self.last_update_time = time.time() + + if os.path.exists(os.path.join(self.submit_config.run_dir, "abort.txt")): + self.should_stop_flag = True + + def should_stop(self) -> bool: + """Tell whether a stopping condition has been triggered one way or another.""" + return self.should_stop_flag + + def get_time_since_start(self) -> float: + """How much time has passed since the creation of the context.""" + return time.time() - self.start_time + + def get_time_since_last_update(self) -> float: + """How much time has passed since the last call to update.""" + return time.time() - self.last_update_time + + def get_last_update_interval(self) -> float: + """How much time passed between the previous two calls to update.""" + return self.last_update_interval + + def close(self) -> None: + """Close the context and clean up. + Should only be called once.""" + if not self.has_closed: + # update the run.txt with stopping time + self.run_txt_data["stop_time"] = datetime.datetime.now().isoformat(sep=" ") + with open(os.path.join(self.submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + self.has_closed = True + + # detach the global singleton + global _run_context + if _run_context is self: + _run_context = None + + @staticmethod + def get(): + import dnnlib + if _run_context is not None: + return _run_context + return RunContext(dnnlib.submit_config) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/submit.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/submit.py new file mode 100644 index 0000000000000000000000000000000000000000..514647dd6a0585c7bd6864380a95b8059bcfba42 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/submission/submit.py @@ -0,0 +1,343 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Submit a function to be run either locally or in a computing cluster.""" + +import copy +import inspect +import os +import pathlib +import pickle +import platform +import pprint +import re +import shutil +import sys +import time +import traceback + +from enum import Enum + +from .. import util +from ..util import EasyDict + +from . import internal + +class SubmitTarget(Enum): + """The target where the function should be run. + + LOCAL: Run it locally. + """ + LOCAL = 1 + + +class PathType(Enum): + """Determines in which format should a path be formatted. + + WINDOWS: Format with Windows style. + LINUX: Format with Linux/Posix style. + AUTO: Use current OS type to select either WINDOWS or LINUX. + """ + WINDOWS = 1 + LINUX = 2 + AUTO = 3 + + +class PlatformExtras: + """A mixed bag of values used by dnnlib heuristics. + + Attributes: + + data_reader_buffer_size: Used by DataReader to size internal shared memory buffers. + data_reader_process_count: Number of worker processes to spawn (zero for single thread operation) + """ + def __init__(self): + self.data_reader_buffer_size = 1<<30 # 1 GB + self.data_reader_process_count = 0 # single threaded default + + +_user_name_override = None + +class SubmitConfig(util.EasyDict): + """Strongly typed config dict needed to submit runs. + + Attributes: + run_dir_root: Path to the run dir root. Can be optionally templated with tags. Needs to always be run through get_path_from_template. + run_desc: Description of the run. Will be used in the run dir and task name. + run_dir_ignore: List of file patterns used to ignore files when copying files to the run dir. + run_dir_extra_files: List of (abs_path, rel_path) tuples of file paths. rel_path root will be the src directory inside the run dir. + submit_target: Submit target enum value. Used to select where the run is actually launched. + num_gpus: Number of GPUs used/requested for the run. + print_info: Whether to print debug information when submitting. + local.do_not_copy_source_files: Do not copy source files from the working directory to the run dir. + run_id: Automatically populated value during submit. + run_name: Automatically populated value during submit. + run_dir: Automatically populated value during submit. + run_func_name: Automatically populated value during submit. + run_func_kwargs: Automatically populated value during submit. + user_name: Automatically populated value during submit. Can be set by the user which will then override the automatic value. + task_name: Automatically populated value during submit. + host_name: Automatically populated value during submit. + platform_extras: Automatically populated values during submit. Used by various dnnlib libraries such as the DataReader class. + """ + + def __init__(self): + super().__init__() + + # run (set these) + self.run_dir_root = "" # should always be passed through get_path_from_template + self.run_desc = "" + self.run_dir_ignore = ["__pycache__", "*.pyproj", "*.sln", "*.suo", ".cache", ".idea", ".vs", ".vscode", "_cudacache"] + self.run_dir_extra_files = [] + + # submit (set these) + self.submit_target = SubmitTarget.LOCAL + self.num_gpus = 1 + self.print_info = False + self.nvprof = False + self.local = internal.local.TargetOptions() + self.datasets = [] + + # (automatically populated) + self.run_id = None + self.run_name = None + self.run_dir = None + self.run_func_name = None + self.run_func_kwargs = None + self.user_name = None + self.task_name = None + self.host_name = "localhost" + self.platform_extras = PlatformExtras() + + +def get_path_from_template(path_template: str, path_type: PathType = PathType.AUTO) -> str: + """Replace tags in the given path template and return either Windows or Linux formatted path.""" + # automatically select path type depending on running OS + if path_type == PathType.AUTO: + if platform.system() == "Windows": + path_type = PathType.WINDOWS + elif platform.system() == "Linux": + path_type = PathType.LINUX + else: + raise RuntimeError("Unknown platform") + + path_template = path_template.replace("", get_user_name()) + + # return correctly formatted path + if path_type == PathType.WINDOWS: + return str(pathlib.PureWindowsPath(path_template)) + elif path_type == PathType.LINUX: + return str(pathlib.PurePosixPath(path_template)) + else: + raise RuntimeError("Unknown platform") + + +def get_template_from_path(path: str) -> str: + """Convert a normal path back to its template representation.""" + path = path.replace("\\", "/") + return path + + +def convert_path(path: str, path_type: PathType = PathType.AUTO) -> str: + """Convert a normal path to template and the convert it back to a normal path with given path type.""" + path_template = get_template_from_path(path) + path = get_path_from_template(path_template, path_type) + return path + + +def set_user_name_override(name: str) -> None: + """Set the global username override value.""" + global _user_name_override + _user_name_override = name + + +def get_user_name(): + """Get the current user name.""" + if _user_name_override is not None: + return _user_name_override + elif platform.system() == "Windows": + return os.getlogin() + elif platform.system() == "Linux": + try: + import pwd + return pwd.getpwuid(os.geteuid()).pw_name + except: + return "unknown" + else: + raise RuntimeError("Unknown platform") + + +def make_run_dir_path(*paths): + """Make a path/filename that resides under the current submit run_dir. + + Args: + *paths: Path components to be passed to os.path.join + + Returns: + A file/dirname rooted at submit_config.run_dir. If there's no + submit_config or run_dir, the base directory is the current + working directory. + + E.g., `os.path.join(dnnlib.submit_config.run_dir, "output.txt"))` + """ + import dnnlib + if (dnnlib.submit_config is None) or (dnnlib.submit_config.run_dir is None): + return os.path.join(os.getcwd(), *paths) + return os.path.join(dnnlib.submit_config.run_dir, *paths) + + +def _create_run_dir_local(submit_config: SubmitConfig) -> str: + """Create a new run dir with increasing ID number at the start.""" + run_dir_root = get_path_from_template(submit_config.run_dir_root, PathType.AUTO) + + if not os.path.exists(run_dir_root): + os.makedirs(run_dir_root) + + submit_config.run_id = _get_next_run_id_local(run_dir_root) + submit_config.run_name = "{0:05d}-{1}".format(submit_config.run_id, submit_config.run_desc) + run_dir = os.path.join(run_dir_root, submit_config.run_name) + + if os.path.exists(run_dir): + raise RuntimeError("The run dir already exists! ({0})".format(run_dir)) + + os.makedirs(run_dir) + + return run_dir + + +def _get_next_run_id_local(run_dir_root: str) -> int: + """Reads all directory names in a given directory (non-recursive) and returns the next (increasing) run id. Assumes IDs are numbers at the start of the directory names.""" + dir_names = [d for d in os.listdir(run_dir_root) if os.path.isdir(os.path.join(run_dir_root, d))] + r = re.compile("^\\d+") # match one or more digits at the start of the string + run_id = 0 + + for dir_name in dir_names: + m = r.match(dir_name) + + if m is not None: + i = int(m.group()) + run_id = max(run_id, i + 1) + + return run_id + + +def _populate_run_dir(submit_config: SubmitConfig, run_dir: str) -> None: + """Copy all necessary files into the run dir. Assumes that the dir exists, is local, and is writable.""" + pickle.dump(submit_config, open(os.path.join(run_dir, "submit_config.pkl"), "wb")) + with open(os.path.join(run_dir, "submit_config.txt"), "w") as f: + pprint.pprint(submit_config, stream=f, indent=4, width=200, compact=False) + + if (submit_config.submit_target == SubmitTarget.LOCAL) and submit_config.local.do_not_copy_source_files: + return + + files = [] + + run_func_module_dir_path = util.get_module_dir_by_obj_name(submit_config.run_func_name) + assert '.' in submit_config.run_func_name + for _idx in range(submit_config.run_func_name.count('.') - 1): + run_func_module_dir_path = os.path.dirname(run_func_module_dir_path) + files += util.list_dir_recursively_with_ignore(run_func_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=False) + + dnnlib_module_dir_path = util.get_module_dir_by_obj_name("dnnlib") + files += util.list_dir_recursively_with_ignore(dnnlib_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=True) + + files += submit_config.run_dir_extra_files + + files = [(f[0], os.path.join(run_dir, "src", f[1])) for f in files] + files += [(os.path.join(dnnlib_module_dir_path, "submission", "internal", "run.py"), os.path.join(run_dir, "run.py"))] + + util.copy_files_and_create_dirs(files) + + + +def run_wrapper(submit_config: SubmitConfig) -> None: + """Wrap the actual run function call for handling logging, exceptions, typing, etc.""" + is_local = submit_config.submit_target == SubmitTarget.LOCAL + + # when running locally, redirect stderr to stdout, log stdout to a file, and force flushing + if is_local: + logger = util.Logger(file_name=os.path.join(submit_config.run_dir, "log.txt"), file_mode="w", should_flush=True) + else: # when running in a cluster, redirect stderr to stdout, and just force flushing (log writing is handled by run.sh) + logger = util.Logger(file_name=None, should_flush=True) + + import dnnlib + dnnlib.submit_config = submit_config + + exit_with_errcode = False + try: + print("dnnlib: Running {0}() on {1}...".format(submit_config.run_func_name, submit_config.host_name)) + start_time = time.time() + + run_func_obj = util.get_obj_by_name(submit_config.run_func_name) + assert callable(run_func_obj) + sig = inspect.signature(run_func_obj) + if 'submit_config' in sig.parameters: + run_func_obj(submit_config=submit_config, **submit_config.run_func_kwargs) + else: + run_func_obj(**submit_config.run_func_kwargs) + + print("dnnlib: Finished {0}() in {1}.".format(submit_config.run_func_name, util.format_time(time.time() - start_time))) + except: + if is_local: + raise + else: + traceback.print_exc() + + log_src = os.path.join(submit_config.run_dir, "log.txt") + log_dst = os.path.join(get_path_from_template(submit_config.run_dir_root), "{0}-error.txt".format(submit_config.run_name)) + shutil.copyfile(log_src, log_dst) + + # Defer sys.exit(1) to happen after we close the logs and create a _finished.txt + exit_with_errcode = True + finally: + open(os.path.join(submit_config.run_dir, "_finished.txt"), "w").close() + + dnnlib.RunContext.get().close() + dnnlib.submit_config = None + logger.close() + + # If we hit an error, get out of the script now and signal the error + # to whatever process that started this script. + if exit_with_errcode: + sys.exit(1) + + return submit_config + + +def submit_run(submit_config: SubmitConfig, run_func_name: str, **run_func_kwargs) -> None: + """Create a run dir, gather files related to the run, copy files to the run dir, and launch the run in appropriate place.""" + submit_config = copy.deepcopy(submit_config) + + submit_target = submit_config.submit_target + farm = None + if submit_target == SubmitTarget.LOCAL: + farm = internal.local.Target() + assert farm is not None # unknown target + + # Disallow submitting jobs with zero num_gpus. + if (submit_config.num_gpus is None) or (submit_config.num_gpus == 0): + raise RuntimeError("submit_config.num_gpus must be set to a non-zero value") + + if submit_config.user_name is None: + submit_config.user_name = get_user_name() + + submit_config.run_func_name = run_func_name + submit_config.run_func_kwargs = run_func_kwargs + + #-------------------------------------------------------------------- + # Prepare submission by populating the run dir + #-------------------------------------------------------------------- + host_run_dir = _create_run_dir_local(submit_config) + + submit_config.task_name = "{0}-{1:05d}-{2}".format(submit_config.user_name, submit_config.run_id, submit_config.run_desc) + docker_valid_name_regex = "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$" + if not re.match(docker_valid_name_regex, submit_config.task_name): + raise RuntimeError("Invalid task name. Probable reason: unacceptable characters in your submit_config.run_desc. Task name must be accepted by the following regex: " + docker_valid_name_regex + ", got " + submit_config.task_name) + + # Farm specific preparations for a submit + farm.finalize_submit_config(submit_config, host_run_dir) + _populate_run_dir(submit_config, host_run_dir) + return farm.submit(submit_config, host_run_dir) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02c25173d3f2391c88b142cf80af02cd93b0b5a0 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import autosummary +from . import network +from . import optimizer +from . import tfutil +from . import custom_ops + +from .tfutil import * +from .network import Network + +from .optimizer import Optimizer + +from .custom_ops import get_plugin diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/autosummary.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/autosummary.py new file mode 100644 index 0000000000000000000000000000000000000000..6b0d80b371620bedadf8164772b7d6f87806fc11 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/autosummary.py @@ -0,0 +1,191 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper for adding automatically tracked values to Tensorboard. + +Autosummary creates an identity op that internally keeps track of the input +values and automatically shows up in TensorBoard. The reported value +represents an average over input components. The average is accumulated +constantly over time and flushed when save_summaries() is called. + +Notes: +- The output tensor must be used as an input for something else in the + graph. Otherwise, the autosummary op will not get executed, and the average + value will not get accumulated. +- It is perfectly fine to include autosummaries with the same name in + several places throughout the graph, even if they are executed concurrently. +- It is ok to also pass in a python scalar or numpy array. In this case, it + is added to the average immediately. +""" + +from collections import OrderedDict +import numpy as np +import tensorflow as tf +from tensorboard import summary as summary_lib +from tensorboard.plugins.custom_scalar import layout_pb2 + +from . import tfutil +from .tfutil import TfExpression +from .tfutil import TfExpressionEx + +# Enable "Custom scalars" tab in TensorBoard for advanced formatting. +# Disabled by default to reduce tfevents file size. +enable_custom_scalars = False + +_dtype = tf.float64 +_vars = OrderedDict() # name => [var, ...] +_immediate = OrderedDict() # name => update_op, update_value +_finalized = False +_merge_op = None + + +def _create_var(name: str, value_expr: TfExpression) -> TfExpression: + """Internal helper for creating autosummary accumulators.""" + assert not _finalized + name_id = name.replace("/", "_") + v = tf.cast(value_expr, _dtype) + + if v.shape.is_fully_defined(): + size = np.prod(v.shape.as_list()) + size_expr = tf.constant(size, dtype=_dtype) + else: + size = None + size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) + + if size == 1: + if v.shape.ndims != 0: + v = tf.reshape(v, []) + v = [size_expr, v, tf.square(v)] + else: + v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] + v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) + + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): + var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + + if name in _vars: + _vars[name].append(var) + else: + _vars[name] = [var] + return update_op + + +def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: + """Create a new autosummary. + + Args: + name: Name to use in TensorBoard + value: TensorFlow expression or python value to track + passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. + + Example use of the passthru mechanism: + + n = autosummary('l2loss', loss, passthru=n) + + This is a shorthand for the following code: + + with tf.control_dependencies([autosummary('l2loss', loss)]): + n = tf.identity(n) + """ + tfutil.assert_tf_initialized() + name_id = name.replace("/", "_") + + if tfutil.is_tf_expression(value): + with tf.name_scope("summary_" + name_id), tf.device(value.device): + condition = tf.convert_to_tensor(condition, name='condition') + update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) + with tf.control_dependencies([update_op]): + return tf.identity(value if passthru is None else passthru) + + else: # python scalar or numpy array + assert not tfutil.is_tf_expression(passthru) + assert not tfutil.is_tf_expression(condition) + if condition: + if name not in _immediate: + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(_dtype) + update_op = _create_var(name, update_value) + _immediate[name] = update_op, update_value + update_op, update_value = _immediate[name] + tfutil.run(update_op, {update_value: value}) + return value if passthru is None else passthru + + +def finalize_autosummaries() -> None: + """Create the necessary ops to include autosummaries in TensorBoard report. + Note: This should be done only once per graph. + """ + global _finalized + tfutil.assert_tf_initialized() + + if _finalized: + return None + + _finalized = True + tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) + + # Create summary ops. + with tf.device(None), tf.control_dependencies(None): + for name, vars_list in _vars.items(): + name_id = name.replace("/", "_") + with tfutil.absolute_name_scope("Autosummary/" + name_id): + moments = tf.add_n(vars_list) + moments /= moments[0] + with tf.control_dependencies([moments]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + mean = moments[1] + std = tf.sqrt(moments[2] - tf.square(moments[1])) + tf.summary.scalar(name, mean) + if enable_custom_scalars: + tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) + tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) + + # Setup layout for custom scalars. + layout = None + if enable_custom_scalars: + cat_dict = OrderedDict() + for series_name in sorted(_vars.keys()): + p = series_name.split("/") + cat = p[0] if len(p) >= 2 else "" + chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] + if cat not in cat_dict: + cat_dict[cat] = OrderedDict() + if chart not in cat_dict[cat]: + cat_dict[cat][chart] = [] + cat_dict[cat][chart].append(series_name) + categories = [] + for cat_name, chart_dict in cat_dict.items(): + charts = [] + for chart_name, series_names in chart_dict.items(): + series = [] + for series_name in series_names: + series.append(layout_pb2.MarginChartContent.Series( + value=series_name, + lower="xCustomScalars/" + series_name + "/margin_lo", + upper="xCustomScalars/" + series_name + "/margin_hi")) + margin = layout_pb2.MarginChartContent(series=series) + charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) + categories.append(layout_pb2.Category(title=cat_name, chart=charts)) + layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) + return layout + +def save_summaries(file_writer, global_step=None): + """Call FileWriter.add_summary() with all summaries in the default graph, + automatically finalizing and merging them on the first call. + """ + global _merge_op + tfutil.assert_tf_initialized() + + if _merge_op is None: + layout = finalize_autosummaries() + if layout is not None: + file_writer.add_summary(layout) + with tf.device(None), tf.control_dependencies(None): + _merge_op = tf.summary.merge_all() + + file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/custom_ops.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..e6c3e52876005acf9f6b2f2386547b5749a14ecf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/custom_ops.py @@ -0,0 +1,169 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""TensorFlow custom ops builder. +""" + +import os +import re +import uuid +import hashlib +import tempfile +import shutil +import tensorflow as tf +from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module + +#---------------------------------------------------------------------------- +# Global options. + +cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') +cuda_cache_version_tag = 'v1' +do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! +verbose = True # Print status messages to stdout. + +compiler_bindir_search_path = [ + 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.14.26428/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', +] + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + for compiler_path in compiler_bindir_search_path: + if os.path.isdir(compiler_path): + return compiler_path + return None + +def _get_compute_cap(device): + caps_str = device.physical_device_desc + m = re.search('compute capability: (\\d+).(\\d+)', caps_str) + major = m.group(1) + minor = m.group(2) + return (major, minor) + +def _get_cuda_gpu_arch_string(): + gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] + if len(gpus) == 0: + raise RuntimeError('No GPU devices found') + (major, minor) = _get_compute_cap(gpus[0]) + return 'sm_%s%s' % (major, minor) + +def _run_cmd(cmd): + with os.popen(cmd) as pipe: + output = pipe.read() + status = pipe.close() + if status is not None: + raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) + +def _prepare_nvcc_cli(opts): + cmd = 'nvcc ' + opts.strip() + cmd += ' --disable-warnings' + cmd += ' --include-path "%s"' % tf.sysconfig.get_include() + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') + + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + # Require that _find_compiler_bindir succeeds on Windows. Allow + # nvcc to use whatever is the default on Linux. + if os.name == 'nt': + raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) + else: + cmd += ' --compiler-bindir "%s"' % compiler_bindir + cmd += ' 2>&1' + return cmd + +#---------------------------------------------------------------------------- +# Main entry point. + +_plugin_cache = dict() + +def get_plugin(cuda_file): + cuda_file_base = os.path.basename(cuda_file) + cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) + + # Already in cache? + if cuda_file in _plugin_cache: + return _plugin_cache[cuda_file] + + # Setup plugin. + if verbose: + print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) + try: + # Hash CUDA source. + md5 = hashlib.md5() + with open(cuda_file, 'rb') as f: + md5.update(f.read()) + md5.update(b'\n') + + # Hash headers included by the CUDA code by running it through the preprocessor. + if not do_not_hash_included_headers: + if verbose: + print('Preprocessing... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) + _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) + with open(tmp_file, 'rb') as f: + bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros + good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') + for ln in f: + if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas + ln = ln.replace(bad_file_str, good_file_str) + md5.update(ln) + md5.update(b'\n') + + # Select compiler options. + compile_opts = '' + if os.name == 'nt': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') + elif os.name == 'posix': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') + compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\'' + else: + assert False # not Windows or Linux, w00t? + compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() + compile_opts += ' --use_fast_math' + nvcc_cmd = _prepare_nvcc_cli(compile_opts) + + # Hash build configuration. + md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') + md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') + md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') + + # Compile if not already compiled. + bin_file_ext = '.dll' if os.name == 'nt' else '.so' + bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) + if not os.path.isfile(bin_file): + if verbose: + print('Compiling... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) + _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) + os.makedirs(cuda_cache_path, exist_ok=True) + intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) + shutil.copyfile(tmp_file, intermediate_file) + os.rename(intermediate_file, bin_file) # atomic + + # Load. + if verbose: + print('Loading... ', end='', flush=True) + plugin = tf.load_op_library(bin_file) + + # Add to cache. + _plugin_cache[cuda_file] = plugin + if verbose: + print('Done.', flush=True) + return plugin + + except: + if verbose: + print('Failed!', flush=True) + raise + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/network.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/network.py new file mode 100644 index 0000000000000000000000000000000000000000..409babb1d2166d341bfaee2ef460d8810bdaf51f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/network.py @@ -0,0 +1,590 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper for managing networks.""" + +import types +import inspect +import re +import uuid +import sys +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import Any, List, Tuple, Union + +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_import_module_src = dict() # Source code for temporary modules created during pickle import. + + +def import_handler(handler_func): + """Function decorator for declaring custom import handlers.""" + _import_handlers.append(handler_func) + return handler_func + + +class Network: + """Generic network abstraction. + + Acts as a convenience wrapper for a parameterized network construction + function, providing several utility methods and convenient access to + the inputs/outputs/weights. + + Network objects can be safely pickled and unpickled for long-term + archival purposes. The pickling works reliably as long as the underlying + network construction function is defined in a standalone Python module + that has no side effects or application-specific imports. + + Args: + name: Network name. Used to select TensorFlow name and variable scopes. + func_name: Fully qualified name of the underlying network construction function, or a top-level function object. + static_kwargs: Keyword arguments to be passed in to the network construction function. + + Attributes: + name: User-specified name, defaults to build func name if None. + scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name. + static_kwargs: Arguments passed to the user-supplied build func. + components: Container for sub-networks. Passed to the build func, and retained between calls. + num_inputs: Number of input tensors. + num_outputs: Number of output tensors. + input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension. + output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension. + input_shape: Short-hand for input_shapes[0]. + output_shape: Short-hand for output_shapes[0]. + input_templates: Input placeholders in the template graph. + output_templates: Output tensors in the template graph. + input_names: Name string for each input. + output_names: Name string for each output. + own_vars: Variables defined by this network (local_name => var), excluding sub-networks. + vars: All variables (local_name => var). + trainables: All trainable variables (local_name => var). + var_global_to_local: Mapping from variable global names to local names. + """ + + def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): + tfutil.assert_tf_initialized() + assert isinstance(name, str) or name is None + assert func_name is not None + assert isinstance(func_name, str) or util.is_top_level_function(func_name) + assert util.is_pickleable(static_kwargs) + + self._init_fields() + self.name = name + self.static_kwargs = util.EasyDict(static_kwargs) + + # Locate the user-specified network build function. + if util.is_top_level_function(func_name): + func_name = util.get_top_level_function_name(func_name) + module, self._build_func_name = util.get_module_from_obj_name(func_name) + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Dig up source code for the module containing the build function. + self._build_module_src = _import_module_src.get(module, None) + if self._build_module_src is None: + self._build_module_src = inspect.getsource(module) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + + def _init_fields(self) -> None: + self.name = None + self.scope = None + self.static_kwargs = util.EasyDict() + self.components = util.EasyDict() + self.num_inputs = 0 + self.num_outputs = 0 + self.input_shapes = [[]] + self.output_shapes = [[]] + self.input_shape = [] + self.output_shape = [] + self.input_templates = [] + self.output_templates = [] + self.input_names = [] + self.output_names = [] + self.own_vars = OrderedDict() + self.vars = OrderedDict() + self.trainables = OrderedDict() + self.var_global_to_local = OrderedDict() + + self._build_func = None # User-supplied build function that constructs the network. + self._build_func_name = None # Name of the build function. + self._build_module_src = None # Full source code of the module containing the build function. + self._run_cache = dict() # Cached graph data for Network.run(). + + def _init_graph(self) -> None: + # Collect inputs. + self.input_names = [] + + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self.input_names.append(param.name) + + self.num_inputs = len(self.input_names) + assert self.num_inputs >= 1 + + # Choose name and scope. + if self.name is None: + self.name = self._build_func_name + assert re.match("^[A-Za-z0-9_.\\-]*$", self.name) + with tf.name_scope(None): + self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs["is_template_graph"] = True + build_kwargs["components"] = self.components + + # Build template graph. + with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes + assert tf.get_variable_scope().name == self.scope + assert tf.get_default_graph().get_name_scope() == self.scope + with tf.control_dependencies(None): # ignore surrounding control dependencies + self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + out_expr = self._build_func(*self.input_templates, **build_kwargs) + + # Collect outputs. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + self.num_outputs = len(self.output_templates) + assert self.num_outputs >= 1 + assert all(tfutil.is_tf_expression(t) for t in self.output_templates) + + # Perform sanity checks. + if any(t.shape.ndims is None for t in self.input_templates): + raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") + if any(t.shape.ndims is None for t in self.output_templates): + raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") + if any(not isinstance(comp, Network) for comp in self.components.values()): + raise ValueError("Components of a Network must be Networks themselves.") + if len(self.components) != len(set(comp.name for comp in self.components.values())): + raise ValueError("Components of a Network must have unique names.") + + # List inputs and outputs. + self.input_shapes = [t.shape.as_list() for t in self.input_templates] + self.output_shapes = [t.shape.as_list() for t in self.output_templates] + self.input_shape = self.input_shapes[0] + self.output_shape = self.output_shapes[0] + self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] + + # List variables. + self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) + self.vars = OrderedDict(self.own_vars) + self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items()) + self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) + self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) + + def reset_own_vars(self) -> None: + """Re-initialize all variables of this network, excluding sub-networks.""" + tfutil.run([var.initializer for var in self.own_vars.values()]) + + def reset_vars(self) -> None: + """Re-initialize all variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.vars.values()]) + + def reset_trainables(self) -> None: + """Re-initialize all trainable variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.trainables.values()]) + + def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: + """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s).""" + assert len(in_expr) == self.num_inputs + assert not all(expr is None for expr in in_expr) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs.update(dynamic_kwargs) + build_kwargs["is_template_graph"] = False + build_kwargs["components"] = self.components + + # Build TensorFlow graph to evaluate the network. + with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): + assert tf.get_variable_scope().name == self.scope + valid_inputs = [expr for expr in in_expr if expr is not None] + final_inputs = [] + for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): + if expr is not None: + expr = tf.identity(expr, name=name) + else: + expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) + final_inputs.append(expr) + out_expr = self._build_func(*final_inputs, **build_kwargs) + + # Propagate input shapes back to the user-specified expressions. + for expr, final in zip(in_expr, final_inputs): + if isinstance(expr, tf.Tensor): + expr.set_shape(final.shape) + + # Express outputs in the desired format. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + return out_expr + + def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: + """Get the local name of a given variable, without any surrounding name scopes.""" + assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) + global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name + return self.var_global_to_local[global_name] + + def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: + """Find variable by local or global name.""" + assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) + return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name + + def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: + """Get the value of a given variable as NumPy array. + Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" + return self.find_var(var_or_local_name).eval() + + def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: + """Set the value of a given variable based on the given NumPy array. + Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" + tfutil.set_vars({self.find_var(var_or_local_name): new_value}) + + def __getstate__(self) -> dict: + """Pickle export.""" + state = dict() + state["version"] = 4 + state["name"] = self.name + state["static_kwargs"] = dict(self.static_kwargs) + state["components"] = dict(self.components) + state["build_module_src"] = self._build_module_src + state["build_func_name"] = self._build_func_name + state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values())))) + return state + + def __setstate__(self, state: dict) -> None: + """Pickle import.""" + # pylint: disable=attribute-defined-outside-init + tfutil.assert_tf_initialized() + self._init_fields() + + # Execute custom import handlers. + for handler in _import_handlers: + state = handler(state) + + # Set basic fields. + assert state["version"] in [2, 3, 4] + self.name = state["name"] + self.static_kwargs = util.EasyDict(state["static_kwargs"]) + self.components = util.EasyDict(state.get("components", {})) + self._build_module_src = state["build_module_src"] + self._build_func_name = state["build_func_name"] + + # Create temporary module from the imported source code. + module_name = "_tflib_network_import_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _import_module_src[module] = self._build_module_src + exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used + + # Locate network build function in the temporary module. + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]}) + + def clone(self, name: str = None, **new_static_kwargs) -> "Network": + """Create a clone of this network with its own copy of the variables.""" + # pylint: disable=protected-access + net = object.__new__(Network) + net._init_fields() + net.name = name if name is not None else self.name + net.static_kwargs = util.EasyDict(self.static_kwargs) + net.static_kwargs.update(new_static_kwargs) + net._build_module_src = self._build_module_src + net._build_func_name = self._build_func_name + net._build_func = self._build_func + net._init_graph() + net.copy_vars_from(self) + return net + + def copy_own_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, excluding sub-networks.""" + names = [name for name in self.own_vars.keys() if name in src_net.own_vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, including sub-networks.""" + names = [name for name in self.vars.keys() if name in src_net.vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_trainables_from(self, src_net: "Network") -> None: + """Copy the values of all trainable variables from the given network, including sub-networks.""" + names = [name for name in self.trainables.keys() if name in src_net.trainables] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": + """Create new network with the given parameters, and copy all variables from this network.""" + if new_name is None: + new_name = self.name + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = Network(name=new_name, func_name=new_func_name, **static_kwargs) + net.copy_vars_from(self) + return net + + def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: + """Construct a TensorFlow op that updates the variables of this network + to be slightly closer to those of the given network.""" + with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): + ops = [] + for name, var in self.vars.items(): + if name in src_net.vars: + cur_beta = beta if name in self.trainables else beta_nontrainable + new_value = tfutil.lerp(src_net.vars[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + def run(self, + *in_arrays: Tuple[Union[np.ndarray, None], ...], + input_transform: dict = None, + output_transform: dict = None, + return_as_list: bool = False, + print_progress: bool = False, + minibatch_size: int = None, + num_gpus: int = 1, + assume_frozen: bool = False, + **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: + """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + + Args: + input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the input + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the output + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress: Print progress to the console? Useful for very large input arrays. + minibatch_size: Maximum minibatch size to use, None = disable batching. + num_gpus: Number of GPUs to use. + assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. + dynamic_kwargs: Additional keyword arguments to be passed into the network build function. + """ + assert len(in_arrays) == self.num_inputs + assert not all(arr is None for arr in in_arrays) + assert input_transform is None or util.is_top_level_function(input_transform["func"]) + assert output_transform is None or util.is_top_level_function(output_transform["func"]) + output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + + # Construct unique hash key from all arguments that affect the TensorFlow graph. + key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) + def unwind_key(obj): + if isinstance(obj, dict): + return [(key, unwind_key(value)) for key, value in sorted(obj.items())] + if callable(obj): + return util.get_top_level_function_name(obj) + return obj + key = repr(unwind_key(key)) + + # Build graph. + if key not in self._run_cache: + with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): + with tf.device("/cpu:0"): + in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) + + out_split = [] + for gpu in range(num_gpus): + with tf.device("/gpu:%d" % gpu): + net_gpu = self.clone() if assume_frozen else self + in_gpu = in_split[gpu] + + if input_transform is not None: + in_kwargs = dict(input_transform) + in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) + in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) + + assert len(in_gpu) == self.num_inputs + out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) + + if output_transform is not None: + out_kwargs = dict(output_transform) + out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) + out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) + + assert len(out_gpu) == self.num_outputs + out_split.append(out_gpu) + + with tf.device("/cpu:0"): + out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + self._run_cache[key] = in_expr, out_expr + + # Run minibatches. + in_expr, out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] + + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print("\r%d / %d" % (mb_begin, num_items), end="") + + mb_end = min(mb_begin + minibatch_size, num_items) + mb_num = mb_end - mb_begin + mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] + mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) + + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin: mb_end] = src + + # Done. + if print_progress: + print("\r%d / %d" % (num_items, num_items)) + + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + def list_ops(self) -> List[TfExpression]: + include_prefix = self.scope + "/" + exclude_prefix = include_prefix + "_" + ops = tf.get_default_graph().get_operations() + ops = [op for op in ops if op.name.startswith(include_prefix)] + ops = [op for op in ops if not op.name.startswith(exclude_prefix)] + return ops + + def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: + """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to + individual layers of the network. Mainly intended to be used for reporting.""" + layers = [] + + def recurse(scope, parent_ops, parent_vars, level): + # Ignore specific patterns. + if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): + return + + # Filter ops and vars by scope. + global_prefix = scope + "/" + local_prefix = global_prefix[len(self.scope) + 1:] + cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] + cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] + if not cur_ops and not cur_vars: + return + + # Filter out all ops related to variables. + for var in [op for op in cur_ops if op.type.startswith("Variable")]: + var_prefix = var.name + "/" + cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] + + # Scope does not contain ops as immediate children => recurse deeper. + contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) + if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1: + visited = set() + for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: + token = rel_name.split("/")[0] + if token not in visited: + recurse(global_prefix + token, cur_ops, cur_vars, level + 1) + visited.add(token) + return + + # Report layer. + layer_name = scope[len(self.scope) + 1:] + layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] + layer_trainables = [var for _name, var in cur_vars if var.trainable] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, self.list_ops(), list(self.vars.items()), 0) + return layers + + def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: + """Print a summary table of the network structure.""" + rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] + rows += [["---"] * 4] + total_params = 0 + + for layer_name, layer_output, layer_trainables in self.list_layers(): + num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) + weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] + weights.sort(key=lambda x: len(x.name)) + if len(weights) == 0 and len(layer_trainables) == 1: + weights = layer_trainables + total_params += num_params + + if not hide_layers_with_no_params or num_params != 0: + num_params_str = str(num_params) if num_params > 0 else "-" + output_shape_str = str(layer_output.shape) + weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" + rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] + + rows += [["---"] * 4] + rows += [["Total", str(total_params), "", ""]] + + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) + print() + + def setup_weight_histograms(self, title: str = None) -> None: + """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" + if title is None: + title = self.name + + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for local_name, var in self.trainables.items(): + if "/" in local_name: + p = local_name.split("/") + name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) + else: + name = title + "_toplevel/" + local_name + + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- +# Backwards-compatible emulation of legacy output transformation in Network.run(). + +_print_legacy_warning = True + +def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): + global _print_legacy_warning + legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] + if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): + return output_transform, dynamic_kwargs + + if _print_legacy_warning: + _print_legacy_warning = False + print() + print("WARNING: Old-style output transformations in Network.run() are deprecated.") + print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") + print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") + print() + assert output_transform is None + + new_kwargs = dict(dynamic_kwargs) + new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} + new_transform["func"] = _legacy_output_transform_func + return new_transform, new_kwargs + +def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): + if out_mul != 1.0: + expr = [x * out_mul for x in expr] + + if out_add != 0.0: + expr = [x + out_add for x in expr] + + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] + + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + expr = [tf.round(x) for x in expr] + expr = [tf.saturate_cast(x, out_dtype) for x in expr] + return expr diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab9908efa3cb38af52e8d5bcaa8acffde5a8875 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/fused_bias_act.cu b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/fused_bias_act.cu new file mode 100644 index 0000000000000000000000000000000000000000..1102f624fadd0b803bdfb99fecfe145d7ec8abc4 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/fused_bias_act.cu @@ -0,0 +1,188 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +//------------------------------------------------------------------------ +// CUDA kernel. + +template +struct FusedBiasActKernelParams +{ + const T* x; // [sizeX] + const T* b; // [sizeB] or NULL + const T* ref; // [sizeX] or NULL + T* y; // [sizeX] + + int grad; + int axis; + int act; + float alpha; + float gain; + + int sizeX; + int sizeB; + int stepB; + int loopX; +}; + +template +static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p) +{ + const float expRange = 80.0f; + const float halfExpRange = 40.0f; + const float seluScale = 1.0507009873554804934193349852946f; + const float seluAlpha = 1.6732632423543772848170429916717f; + + // Loop over elements. + int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; + for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) + { + // Load and apply bias. + float x = (float)p.x[xi]; + if (p.b) + x += (float)p.b[(xi / p.stepB) % p.sizeB]; + float ref = (p.ref) ? (float)p.ref[xi] : 0.0f; + if (p.gain != 0.0f & p.act != 9) + ref /= p.gain; + + // Evaluate activation func. + float y; + switch (p.act * 10 + p.grad) + { + // linear + default: + case 10: y = x; break; + case 11: y = x; break; + case 12: y = 0.0f; break; + + // relu + case 20: y = (x > 0.0f) ? x : 0.0f; break; + case 21: y = (ref > 0.0f) ? x : 0.0f; break; + case 22: y = 0.0f; break; + + // lrelu + case 30: y = (x > 0.0f) ? x : x * p.alpha; break; + case 31: y = (ref > 0.0f) ? x : x * p.alpha; break; + case 32: y = 0.0f; break; + + // tanh + case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; + case 41: y = x * (1.0f - ref * ref); break; + case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break; + + // sigmoid + case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; + case 51: y = x * ref * (1.0f - ref); break; + case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break; + + // elu + case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; + case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break; + case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break; + + // selu + case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; + case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break; + case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break; + + // softplus + case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; + case 81: y = x * (1.0f - expf(-ref)); break; + case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break; + + // swish + case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; + case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break; + case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break; + } + + // Apply gain and store. + p.y[xi] = (T)(y * p.gain); + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct FusedBiasActOp : public OpKernel +{ + FusedBiasActKernelParams m_attribs; + + FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); + OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); + OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); + OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); + } + + void Compute(OpKernelContext* ctx) + { + FusedBiasActKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [...] + const Tensor& b = ctx->input(1); // [sizeB] or [0] + const Tensor& ref = ctx->input(2); // x.shape or [0] + p.x = x.flat().data(); + p.b = (b.NumElements()) ? b.flat().data() : NULL; + p.ref = (ref.NumElements()) ? ref.flat().data() : NULL; + OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); + OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); + OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); + OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); + + p.sizeX = (int)x.NumElements(); + p.sizeB = (int)b.NumElements(); + p.stepB = 1; + for (int i = m_attribs.axis + 1; i < x.dims(); i++) + p.stepB *= (int)x.dim_size(i); + + Tensor* y = NULL; // x.shape + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); + p.y = y->flat().data(); + + p.loopX = 4; + int blockSize = 4 * 32; + int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("FusedBiasAct") + .Input ("x: T") + .Input ("b: T") + .Input ("ref: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("grad: int = 0") + .Attr ("axis: int = 1") + .Attr ("act: int = 0") + .Attr ("alpha: float = 0.0") + .Attr ("gain: float = 1.0"); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/fused_bias_act.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/fused_bias_act.py new file mode 100644 index 0000000000000000000000000000000000000000..4521ba67e7f89fd028b50223afe973c23a86d0d7 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/fused_bias_act.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Custom TensorFlow ops for efficient bias and activation.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops +from ...util import EasyDict + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +activation_funcs = { + 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), + 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), + 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), + 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), + 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), + 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), + 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), + 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), + 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), +} + +#---------------------------------------------------------------------------- + +def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='ref'): + r"""Fused bias and activation function. + + Adds bias `b` to activation tensor `x`, evaluates activation function `act`, + and scales the result by `gain`. Each of the steps is optional. In most cases, + the fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports first and second order gradients, + but not third order gradients. + + Args: + x: Input activation tensor. Can have any shape, but if `b` is defined, the + dimension corresponding to `axis`, as well as the rank, must be known. + b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type + as `x`. The shape must be known, and it must match the dimension of `x` + corresponding to `axis`. + axis: The dimension in `x` corresponding to the elements of `b`. + The value of `axis` is ignored if `b` is not specified. + act: Name of the activation function to evaluate, or `"linear"` to disable. + Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. + See `activation_funcs` for a full list. `None` is not allowed. + alpha: Shape parameter for the activation function, or `None` to use the default. + gain: Scaling factor for the output tensor, or `None` to use default. + See `activation_funcs` for the default scaling of each activation function. + If unsure, consider specifying `1.0`. + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + impl_dict = { + 'ref': _fused_bias_act_ref, + 'cuda': _fused_bias_act_cuda, + } + return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) + +#---------------------------------------------------------------------------- + +def _fused_bias_act_ref(x, b, axis, act, alpha, gain): + """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Add bias. + if b.shape[0] != 0: + x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) + + # Evaluate activation function. + x = act_spec.func(x, alpha=alpha) + + # Scale by gain. + if gain != 1: + x *= gain + return x + +#---------------------------------------------------------------------------- + +def _fused_bias_act_cuda(x, b, axis, act, alpha, gain): + """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + empty_tensor = tf.constant([], dtype=x.dtype) + b = tf.convert_to_tensor(b) if b is not None else empty_tensor + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Special cases. + if act == 'linear' and b is None and gain == 1.0: + return x + if act_spec.cuda_idx is None: + return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) + + # CUDA kernel. + cuda_kernel = _get_plugin().fused_bias_act + cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain) + + # Forward pass: y = func(x, b). + def func_y(x, b): + y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs) + y.set_shape(x.shape) + return y + + # Backward pass: dx, db = grad(dy, x, y) + def grad_dx(dy, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs) + dx.set_shape(x.shape) + return dx + def grad_db(dx): + if b.shape[0] == 0: + return empty_tensor + db = dx + if axis < x.shape.rank - 1: + db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) + if axis > 0: + db = tf.reduce_sum(db, list(range(axis))) + db.set_shape(b.shape) + return db + + # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) + def grad2_d_dy(d_dx, d_db, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs) + d_dy.set_shape(x.shape) + return d_dy + def grad2_d_x(d_dx, d_db, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs) + d_x.set_shape(x.shape) + return d_x + + # Fast version for piecewise-linear activation funcs. + @tf.custom_gradient + def func_zero_2nd_grad(x, b): + y = func_y(x, b) + @tf.custom_gradient + def grad(dy): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + return d_dy + return (dx, db), grad2 + return y, grad + + # Slow version for general activation funcs. + @tf.custom_gradient + def func_nonzero_2nd_grad(x, b): + y = func_y(x, b) + def grad_wrap(dy): + @tf.custom_gradient + def grad_impl(dy, x): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + d_x = grad2_d_x(d_dx, d_db, x, y) + return d_dy, d_x + return (dx, db), grad2 + return grad_impl(dy, x) + return y, grad_wrap + + # Which version to use? + if act_spec.zero_2nd_grad: + return func_zero_2nd_grad(x, b) + return func_nonzero_2nd_grad(x, b) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/upfirdn_2d.cu b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/upfirdn_2d.cu new file mode 100644 index 0000000000000000000000000000000000000000..b97ef36c9e5ba46a92a380dbc687e275235a1ccf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/upfirdn_2d.cu @@ -0,0 +1,326 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +//------------------------------------------------------------------------ +// Helpers. + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +static __host__ __device__ __forceinline__ int floorDiv(int a, int b) +{ + int c = a / b; + if (c * b > a) + c--; + return c; +} + +//------------------------------------------------------------------------ +// CUDA kernel params. + +template +struct UpFirDn2DKernelParams +{ + const T* x; // [majorDim, inH, inW, minorDim] + const T* k; // [kernelH, kernelW] + T* y; // [majorDim, outH, outW, minorDim] + + int upx; + int upy; + int downx; + int downy; + int padx0; + int padx1; + int pady0; + int pady1; + + int majorDim; + int inH; + int inW; + int minorDim; + int kernelH; + int kernelW; + int outH; + int outW; + int loopMajor; + int loopX; +}; + +//------------------------------------------------------------------------ +// General CUDA implementation for large filter kernels. + +template +static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p) +{ + // Calculate thread index. + int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; + int outY = minorIdx / p.minorDim; + minorIdx -= outY * p.minorDim; + int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) + return; + + // Setup Y receptive field. + int midY = outY * p.downy + p.upy - 1 - p.pady0; + int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); + int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; + int kernelY = midY + p.kernelH - (inY + 1) * p.upy; + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) + { + // Setup X receptive field. + int midX = outX * p.downx + p.upx - 1 - p.padx0; + int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); + int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; + int kernelX = midX + p.kernelW - (inX + 1) * p.upx; + + // Initialize pointers. + const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + const T* kp = &p.k[kernelY * p.kernelW + kernelX]; + int xpx = p.minorDim; + int kpx = -p.upx; + int xpy = p.inW * p.minorDim; + int kpy = -p.upy * p.kernelW; + + // Inner loop. + float v = 0.0f; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + v += (float)(*xp) * (float)(*kp); + xp += xpx; + kp += kpx; + } + xp += xpy - w * xpx; + kp += kpy - w * kpx; + } + + // Store result. + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } +} + +//------------------------------------------------------------------------ +// Specialized CUDA implementation for small filter kernels. + +template +static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p) +{ + //assert(kernelW % upx == 0); + //assert(kernelH % upy == 0); + const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; + const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; + __shared__ volatile float sk[kernelH][kernelW]; + __shared__ volatile float sx[tileInH][tileInW]; + + // Calculate tile index. + int minorIdx = blockIdx.x; + int tileOutY = minorIdx / p.minorDim; + minorIdx -= tileOutY * p.minorDim; + tileOutY *= tileOutH; + int tileOutXBase = blockIdx.y * p.loopX * tileOutW; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) + return; + + // Load filter kernel (flipped). + for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) + { + int ky = tapIdx / kernelW; + int kx = tapIdx - ky * kernelW; + float v = 0.0f; + if (kx < p.kernelW & ky < p.kernelH) + v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; + sk[ky][kx] = v; + } + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) + { + // Load input pixels. + int tileMidX = tileOutX * downx + upx - 1 - p.padx0; + int tileMidY = tileOutY * downy + upy - 1 - p.pady0; + int tileInX = floorDiv(tileMidX, upx); + int tileInY = floorDiv(tileMidY, upy); + __syncthreads(); + for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) + { + int relInY = inIdx / tileInW; + int relInX = inIdx - relInY * tileInW; + int inX = relInX + tileInX; + int inY = relInY + tileInY; + float v = 0.0f; + if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) + v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + sx[relInY][relInX] = v; + } + + // Loop over output pixels. + __syncthreads(); + for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) + { + int relOutY = outIdx / tileOutW; + int relOutX = outIdx - relOutY * tileOutW; + int outX = relOutX + tileOutX; + int outY = relOutY + tileOutY; + + // Setup receptive field. + int midX = tileMidX + relOutX * downx; + int midY = tileMidY + relOutY * downy; + int inX = floorDiv(midX, upx); + int inY = floorDiv(midY, upy); + int relInX = inX - tileInX; + int relInY = inY - tileInY; + int kernelX = (inX + 1) * upx - midX - 1; // flipped + int kernelY = (inY + 1) * upy - midY - 1; // flipped + + // Inner loop. + float v = 0.0f; + #pragma unroll + for (int y = 0; y < kernelH / upy; y++) + #pragma unroll + for (int x = 0; x < kernelW / upx; x++) + v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; + + // Store result. + if (outX < p.outW & outY < p.outH) + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct UpFirDn2DOp : public OpKernel +{ + UpFirDn2DKernelParams m_attribs; + + UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); + OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); + OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); + } + + void Compute(OpKernelContext* ctx) + { + UpFirDn2DKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] + const Tensor& k = ctx->input(1); // [kernelH, kernelW] + p.x = x.flat().data(); + p.k = k.flat().data(); + OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); + OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); + OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); + + p.majorDim = (int)x.dim_size(0); + p.inH = (int)x.dim_size(1); + p.inW = (int)x.dim_size(2); + p.minorDim = (int)x.dim_size(3); + p.kernelH = (int)k.dim_size(0); + p.kernelW = (int)k.dim_size(1); + OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); + + p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; + p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; + OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); + + Tensor* y = NULL; // [majorDim, outH, outW, minorDim] + TensorShape ys; + ys.AddDim(p.majorDim); + ys.AddDim(p.outH); + ys.AddDim(p.outW); + ys.AddDim(p.minorDim); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); + p.y = y->flat().data(); + OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); + + // Choose CUDA kernel to use. + void* cudaKernel = (void*)UpFirDn2DKernel_large; + int tileOutW = -1; + int tileOutH = -1; + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + + // Choose launch params. + dim3 blockSize; + dim3 gridSize; + if (tileOutW > 0 && tileOutH > 0) // small + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 1; + blockSize = dim3(32 * 8, 1, 1); + gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + else // large + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 4; + blockSize = dim3(4, 32, 1); + gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + + // Launch CUDA kernel. + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("UpFirDn2D") + .Input ("x: T") + .Input ("k: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("upx: int = 1") + .Attr ("upy: int = 1") + .Attr ("downx: int = 1") + .Attr ("downy: int = 1") + .Attr ("padx0: int = 0") + .Attr ("padx1: int = 0") + .Attr ("pady0: int = 0") + .Attr ("pady1: int = 0"); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/upfirdn_2d.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/upfirdn_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..c0759ca252d58efa75616a36209b15d4b745ecf1 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/ops/upfirdn_2d.py @@ -0,0 +1,364 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Custom TensorFlow ops for efficient resampling of 2D images.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='ref'): + r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. + + Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` + and performs the following operations for each image, batched across + `majorDim` and `minorDim`: + + 1. Pad the image with zeros by the specified number of pixels on each side + (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value + corresponds to cropping the image. + + 2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). + + 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the + image so that the footprint of all output pixels lies within the input image. + + 4. Downsample the image by throwing away pixels (`downx`, `downy`). + + This sequence of operations bears close resemblance to scipy.signal.upfirdn(). + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. + k: 2D FIR filter of the shape `[firH, firW]`. + upx: Integer upsampling factor along the X-axis (default: 1). + upy: Integer upsampling factor along the Y-axis (default: 1). + downx: Integer downsampling factor along the X-axis (default: 1). + downy: Integer downsampling factor along the Y-axis (default: 1). + padx0: Number of pixels to pad on the left side (default: 0). + padx1: Number of pixels to pad on the right side (default: 0). + pady0: Number of pixels to pad on the top side (default: 0). + pady1: Number of pixels to pad on the bottom side (default: 0). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. + """ + + impl_dict = { + 'ref': _upfirdn_2d_ref, + 'cuda': _upfirdn_2d_cuda, + } + return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + assert x.shape.rank == 4 + inH = x.shape[1].value + inW = x.shape[2].value + minorDim = _shape(x, 3) + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + # Upsample (insert zeros). + x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) + x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) + x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) + + # Pad (crop if negative). + x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) + x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] + + # Convolve with filter. + x = tf.transpose(x, [0, 3, 1, 2]) + x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) + w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) + x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') + x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) + x = tf.transpose(x, [0, 2, 3, 1]) + + # Downsample (throw away pixels). + return x[:, ::downy, ::downx, :] + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + majorDim, inH, inW, minorDim = x.shape.as_list() + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 + outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 + assert outW >= 1 and outH >= 1 + + kc = tf.constant(k, dtype=x.dtype) + gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) + gpadx0 = kernelW - padx0 - 1 + gpady0 = kernelH - pady0 - 1 + gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 + gpady1 = inH * upy - outH * downy + pady0 - upy + 1 + + @tf.custom_gradient + def func(x): + y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + y.set_shape([majorDim, outH, outW, minorDim]) + @tf.custom_gradient + def grad(dy): + dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1) + dx.set_shape([majorDim, inH, inW, minorDim]) + return dx, func + return y, grad + return func(x) + +#---------------------------------------------------------------------------- + +def filter_2d(x, k, gain=1, data_format='NCHW', impl='ref'): + r"""Filter a batch of 2D images with the given FIR filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and filters each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + k = _setup_kernel(k) * gain + p = k.shape[0] - 1 + return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='ref'): + r"""Upsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and upsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the upsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + k = _setup_kernel(k) * (gain * (factor ** 2)) + p = k.shape[0] - factor + return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='ref'): + r"""Downsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and downsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the downsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + k = _setup_kernel(k) * gain + p = k.shape[0] - factor + return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='ref'): + r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Check weight shape. + w = tf.convert_to_tensor(w) + assert w.shape.rank == 4 + convH = w.shape[0].value + convW = w.shape[1].value + inC = _shape(w, 2) + outC = _shape(w, 3) + assert convW == convH + + # Setup filter kernel. + if k is None: + k = [1] * factor + k = _setup_kernel(k) * (gain * (factor ** 2)) + p = (k.shape[0] - factor) - (convW - 1) + + # Determine data dimensions. + if data_format == 'NCHW': + stride = [1, 1, factor, factor] + output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW] + num_groups = _shape(x, 1) // inC + else: + stride = [1, factor, factor, 1] + output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC] + num_groups = _shape(x, 3) // inC + + # Transpose weights. + w = tf.reshape(w, [convH, convW, inC, num_groups, -1]) + w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) + w = tf.reshape(w, [convH, convW, -1, num_groups * inC]) + + # Execute. + x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) + return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='ref'): + r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + w = tf.convert_to_tensor(w) + convH, convW, _inC, _outC = w.shape.as_list() + assert convW == convH + if k is None: + k = [1] * factor + k = _setup_kernel(k) * gain + p = (k.shape[0] - factor) + (convW - 1) + if data_format == 'NCHW': + s = [1, 1, factor, factor] + else: + s = [1, factor, factor, 1] + x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _shape(tf_expr, dim_idx): + if tf_expr.shape.rank is not None: + dim = tf_expr.shape[dim_idx].value + if dim is not None: + return dim + return tf.shape(tf_expr)[dim_idx] + +def _setup_kernel(k): + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + assert k.ndim == 2 + assert k.shape[0] == k.shape[1] + return k + +def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='ref'): + assert data_format in ['NCHW', 'NHWC'] + assert x.shape.rank == 4 + y = x + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) + y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) + return y + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/optimizer.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..9a1b1b833e218902ef145c59a03128e2fba73baf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/optimizer.py @@ -0,0 +1,336 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper wrapper for a Tensorflow optimizer.""" + +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import List, Union + +from . import autosummary +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +try: + # TensorFlow 1.13 + from tensorflow.python.ops import nccl_ops +except: + # Older TensorFlow versions + import tensorflow.contrib.nccl as nccl_ops + +class Optimizer: + """A Wrapper for tf.train.Optimizer. + + Automatically takes care of: + - Gradient averaging for multi-GPU training. + - Gradient accumulation for arbitrarily large minibatches. + - Dynamic loss scaling and typecasts for FP16 training. + - Ignoring corrupted gradients that contain NaNs/Infs. + - Reporting statistics. + - Well-chosen default settings. + """ + + def __init__(self, + name: str = "Train", # Name string that will appear in TensorFlow graph. + tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. + learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. + minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. + share: "Optimizer" = None, # Share internal state with a previously created optimizer? + use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? + loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. + loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. + loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. + report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? + **kwargs): + + # Public fields. + self.name = name + self.learning_rate = learning_rate + self.minibatch_multiplier = minibatch_multiplier + self.id = self.name.replace("/", ".") + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = util.get_obj_by_name(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + + # Private fields. + self._updates_applied = False + self._devices = OrderedDict() # device_name => EasyDict() + self._shared_optimizers = OrderedDict() # device_name => optimizer_class + self._gradient_shapes = None # [shape, ...] + self._report_mem_usage = report_mem_usage + + # Validate arguments. + assert callable(self.optimizer_class) + + # Share internal state if requested. + if share is not None: + assert isinstance(share, Optimizer) + assert self.optimizer_class is share.optimizer_class + assert self.learning_rate is share.learning_rate + assert self.optimizer_kwargs == share.optimizer_kwargs + self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access + + def _get_device(self, device_name: str): + """Get internal state for the given TensorFlow device.""" + tfutil.assert_tf_initialized() + if device_name in self._devices: + return self._devices[device_name] + + # Initialize fields. + device = util.EasyDict() + device.name = device_name + device.optimizer = None # Underlying optimizer: optimizer_class + device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable + device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] + device.grad_clean = OrderedDict() # Clean gradients: var => grad + device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable + device.grad_acc_count = None # Accumulation counter: tf.Variable + device.grad_acc = OrderedDict() # Accumulated gradients: var => grad + + # Setup TensorFlow objects. + with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): + if device_name not in self._shared_optimizers: + optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) + self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + device.optimizer = self._shared_optimizers[device_name] + if self.use_loss_scaling: + device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") + + # Register device. + self._devices[device_name] = device + return device + + def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: + """Register the gradients of the given loss function with respect to the given variables. + Intended to be called once per GPU.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + device = self._get_device(loss.device) + + # Validate trainables. + if isinstance(trainable_vars, dict): + trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars + assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 + assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) + assert all(var.device == device.name for var in trainable_vars) + + # Validate shapes. + if self._gradient_shapes is None: + self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] + assert len(trainable_vars) == len(self._gradient_shapes) + assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) + + # Report memory usage if requested. + deps = [] + if self._report_mem_usage: + self._report_mem_usage = False + try: + with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): + deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) + except tf.errors.NotFoundError: + pass + + # Compute gradients. + with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage + grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) + + # Register gradients. + for grad, var in grad_list: + if var not in device.grad_raw: + device.grad_raw[var] = [] + device.grad_raw[var].append(grad) + + def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: + """Construct training op to update the registered variables based on their gradients.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + self._updates_applied = True + all_ops = [] + + # Check for no-op. + if allow_no_op and len(self._devices) == 0: + with tfutil.absolute_name_scope(self.scope): + return tf.no_op(name='TrainingOp') + + # Clean up gradients. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): + for var, grad in device.grad_raw.items(): + + # Filter out disconnected gradients and convert to float32. + grad = [g for g in grad if g is not None] + grad = [tf.cast(g, tf.float32) for g in grad] + + # Sum within the device. + if len(grad) == 0: + grad = tf.zeros(var.shape) # No gradients => zero. + elif len(grad) == 1: + grad = grad[0] # Single gradient => use as is. + else: + grad = tf.add_n(grad) # Multiple gradients => sum. + + # Scale as needed. + scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) + scale = tf.constant(scale, dtype=tf.float32, name="scale") + if self.minibatch_multiplier is not None: + scale /= tf.cast(self.minibatch_multiplier, tf.float32) + scale = self.undo_loss_scaling(scale) + device.grad_clean[var] = grad * scale + + # Sum gradients across devices. + if len(self._devices) > 1: + with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): + for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): + if len(all_vars) > 0 and all(dim > 0 for dim in all_vars[0].shape.as_list()): # NCCL does not support zero-sized tensors. + all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] + all_grads = nccl_ops.all_sum(all_grads) + for device, var, grad in zip(self._devices.values(), all_vars, all_grads): + device.grad_clean[var] = grad + + # Apply updates separately on each device. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): + # pylint: disable=cell-var-from-loop + + # Accumulate gradients over time. + if self.minibatch_multiplier is None: + acc_ok = tf.constant(True, name='acc_ok') + device.grad_acc = OrderedDict(device.grad_clean) + else: + # Create variables. + with tf.control_dependencies(None): + for var in device.grad_clean.keys(): + device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") + device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") + + # Track counter. + count_cur = device.grad_acc_count + 1.0 + count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) + count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) + acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) + all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) + + # Track gradients. + for var, grad in device.grad_clean.items(): + acc_var = device.grad_acc_vars[var] + acc_cur = acc_var + grad + device.grad_acc[var] = acc_cur + with tf.control_dependencies([acc_cur]): + acc_inc_op = lambda: tf.assign(acc_var, acc_cur) + acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) + all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) + + # No overflow => apply gradients. + all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) + apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) + all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) + + # Adjust loss scaling. + if self.use_loss_scaling: + ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) + ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) + ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) + all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) + + # Last device => report statistics. + if device_idx == len(self._devices) - 1: + all_ops.append(autosummary.autosummary(self.id + "/learning_rate", self.learning_rate)) + all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) + if self.use_loss_scaling: + all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) + + # Initialize variables. + self.reset_optimizer_state() + if self.use_loss_scaling: + tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) + if self.minibatch_multiplier is not None: + tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) + + # Group everything into a single op. + with tfutil.absolute_name_scope(self.scope): + return tf.group(*all_ops, name="TrainingOp") + + def reset_optimizer_state(self) -> None: + """Reset internal state of the underlying optimizer.""" + tfutil.assert_tf_initialized() + tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) + + def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: + """Get or create variable representing log2 of the current dynamic loss scaling factor.""" + return self._get_device(device).loss_scaling_var + + def apply_loss_scaling(self, value: TfExpression) -> TfExpression: + """Apply dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) + + def undo_loss_scaling(self, value: TfExpression) -> TfExpression: + """Undo the effect of dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type + + +class SimpleAdam: + """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" + + def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): + self.name = name + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.all_state_vars = [] + + def variables(self): + return self.all_state_vars + + def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): + assert gate_gradients == tf.train.Optimizer.GATE_NONE + return list(zip(tf.gradients(loss, var_list), var_list)) + + def apply_gradients(self, grads_and_vars): + with tf.name_scope(self.name): + state_vars = [] + update_ops = [] + + # Adjust learning rate to deal with startup bias. + with tf.control_dependencies(None): + b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + state_vars += [b1pow_var, b2pow_var] + b1pow_new = b1pow_var * self.beta1 + b2pow_new = b2pow_var * self.beta2 + update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] + lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) + + # Construct ops to update each variable. + for grad, var in grads_and_vars: + with tf.control_dependencies(None): + m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + state_vars += [m_var, v_var] + m_new = self.beta1 * m_var + (1 - self.beta1) * grad + v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) + var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) + update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] + + # Group everything together. + self.all_state_vars += state_vars + return tf.group(*update_ops) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/tfutil.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/tfutil.py new file mode 100644 index 0000000000000000000000000000000000000000..1127c7beecfe526b459b3b99ee34e1c431e19e1c --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/tflib/tfutil.py @@ -0,0 +1,252 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous helper utils for Tensorflow.""" + +import os +import numpy as np +import tensorflow as tf + +# Silence deprecation warnings from TensorFlow 1.13 onwards +import logging +logging.getLogger('tensorflow').setLevel(logging.ERROR) +import tensorflow.contrib # requires TensorFlow 1.x! +tf.contrib = tensorflow.contrib + +from typing import Any, Iterable, List, Union + +TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] +"""A type that represents a valid Tensorflow expression.""" + +TfExpressionEx = Union[TfExpression, int, float, np.ndarray] +"""A type that can be converted to a valid Tensorflow expression.""" + + +def run(*args, **kwargs) -> Any: + """Run the specified ops in the default session.""" + assert_tf_initialized() + return tf.get_default_session().run(*args, **kwargs) + + +def is_tf_expression(x: Any) -> bool: + """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" + return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) + + +def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: + """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" + return [dim.value for dim in shape] + + +def flatten(x: TfExpressionEx) -> TfExpression: + """Shortcut function for flattening a tensor.""" + with tf.name_scope("Flatten"): + return tf.reshape(x, [-1]) + + +def log2(x: TfExpressionEx) -> TfExpression: + """Logarithm in base 2.""" + with tf.name_scope("Log2"): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + + +def exp2(x: TfExpressionEx) -> TfExpression: + """Exponent in base 2.""" + with tf.name_scope("Exp2"): + return tf.exp(x * np.float32(np.log(2.0))) + + +def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: + """Linear interpolation.""" + with tf.name_scope("Lerp"): + return a + (b - a) * t + + +def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: + """Linear interpolation with clip.""" + with tf.name_scope("LerpClip"): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + + +def absolute_name_scope(scope: str) -> tf.name_scope: + """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" + return tf.name_scope(scope + "/") + + +def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: + """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" + return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) + + +def _sanitize_tf_config(config_dict: dict = None) -> dict: + # Defaults. + cfg = dict() + cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. + cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. + cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. + cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. + + # Remove defaults for environment variables that are already set. + for key in list(cfg): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + if fields[1] in os.environ: + del cfg[key] + + # User overrides. + if config_dict is not None: + cfg.update(config_dict) + return cfg + + +def init_tf(config_dict: dict = None) -> None: + """Initialize TensorFlow session using good default settings.""" + # Skip if already initialized. + if tf.get_default_session() is not None: + return + + # Setup config dict and random seeds. + cfg = _sanitize_tf_config(config_dict) + np_random_seed = cfg["rnd.np_random_seed"] + if np_random_seed is not None: + np.random.seed(np_random_seed) + tf_random_seed = cfg["rnd.tf_random_seed"] + if tf_random_seed == "auto": + tf_random_seed = np.random.randint(1 << 31) + if tf_random_seed is not None: + tf.set_random_seed(tf_random_seed) + + # Setup environment variables. + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + os.environ[fields[1]] = str(value) + + # Create default TensorFlow session. + create_session(cfg, force_as_default=True) + + +def assert_tf_initialized(): + """Check that TensorFlow session has been initialized.""" + if tf.get_default_session() is None: + raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") + + +def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: + """Create tf.Session based on config dict.""" + # Setup TensorFlow config proto. + cfg = _sanitize_tf_config(config_dict) + config_proto = tf.ConfigProto() + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] not in ["rnd", "env"]: + obj = config_proto + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + + # Create session. + session = tf.Session(config=config_proto) + if force_as_default: + # pylint: disable=protected-access + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() + return session + + +def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: + """Initialize all tf.Variables that have not already been initialized. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tf.variables_initializer(tf.report_uninitialized_variables()).run() + """ + assert_tf_initialized() + if target_vars is None: + target_vars = tf.global_variables() + + test_vars = [] + test_ops = [] + + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in target_vars: + assert is_tf_expression(var) + + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + + with absolute_name_scope(var.name.split(":")[0]): + test_ops.append(tf.is_variable_initialized(var)) + + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + + +def set_vars(var_to_value_dict: dict) -> None: + """Set the values of given tf.Variables. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + """ + assert_tf_initialized() + ops = [] + feed_dict = {} + + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(":")[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter + + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + + run(ops, feed_dict) + + +def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): + """Create tf.Variable with large initial value without bloating the tf graph.""" + assert_tf_initialized() + assert isinstance(initial_value, np.ndarray) + zeros = tf.zeros(initial_value.shape, initial_value.dtype) + var = tf.Variable(zeros, *args, **kwargs) + set_vars({var: initial_value}) + return var + + +def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): + """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. + Can be used as an input transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if nhwc_to_nchw: + images = tf.transpose(images, [0, 3, 1, 2]) + return images * ((drange[1] - drange[0]) / 255) + drange[0] + + +def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): + """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. + Can be used as an output transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if shrink > 1: + ksize = [1, 1, shrink, shrink] + images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") + if nchw_to_nhwc: + images = tf.transpose(images, [0, 2, 3, 1]) + scale = 255 / (drange[1] - drange[0]) + images = images * scale + (0.5 - drange[0] * scale) + return tf.saturate_cast(images, tf.uint8) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/util.py b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/util.py new file mode 100644 index 0000000000000000000000000000000000000000..4d5d5b59f78b1e35ce1453da99778a7548532a35 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/dnnlib/util.py @@ -0,0 +1,410 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: str) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + return obj.__module__ + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any, allow_file_urls: bool = False) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + if allow_file_urls and obj.startswith('file:///'): + return True + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert is_url(url, allow_file_urls=True) + assert num_attempts >= 1 + + # Handle file URLs. + if url.startswith('file:///'): + return open(url[len('file:///'):], "rb") + + # Lookup from cache. + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache_dir is not None: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + return open(cache_files[0], "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive download quota exceeded -- please try again later") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache_dir is not None: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + + # Return data as file object. + return io.BytesIO(url_data) diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab9908efa3cb38af52e8d5bcaa8acffde5a8875 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/frechet_inception_distance.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/frechet_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..a4797c67e9374963727066731b1faa6bf313155b --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/frechet_inception_distance.py @@ -0,0 +1,73 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Frechet Inception Distance (FID).""" + +import os +import numpy as np +import scipy +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +class FID(metric_base.MetricBase): + def __init__(self, num_images, minibatch_per_gpu, **kwargs): + super().__init__(**kwargs) + self.num_images = num_images + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + inception = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/inception_v3_features.pkl') + activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) + + # Calculate statistics for reals. + cache_file = self._get_cache_file_for_reals(num_images=self.num_images) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if os.path.isfile(cache_file): + mu_real, sigma_real = misc.load_pkl(cache_file) + else: + for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)): + begin = idx * minibatch_size + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = inception.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True) + if end == self.num_images: + break + mu_real = np.mean(activations, axis=0) + sigma_real = np.cov(activations, rowvar=False) + misc.save_pkl((mu_real, sigma_real), cache_file) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + inception_clone = inception.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) + images = tflib.convert_images_to_uint8(images) + result_expr.append(inception_clone.get_output_for(images)) + + # Calculate statistics for fakes. + for begin in range(0, self.num_images, minibatch_size): + self._report_progress(begin, self.num_images) + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] + mu_fake = np.mean(activations, axis=0) + sigma_fake = np.cov(activations, rowvar=False) + + # Calculate FID. + m = np.square(mu_fake - mu_real).sum() + s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member + dist = m + np.trace(sigma_fake + sigma_real - 2*s) + self._report_result(np.real(dist)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/inception_score.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/inception_score.py new file mode 100644 index 0000000000000000000000000000000000000000..e5ec29fa5665860b56976bf6389a149634c2a413 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/inception_score.py @@ -0,0 +1,58 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Inception Score (IS).""" + +import numpy as np +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +class IS(metric_base.MetricBase): + def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs): + super().__init__(**kwargs) + self.num_images = num_images + self.num_splits = num_splits + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + inception = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/inception_v3_softmax.pkl') + activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + inception_clone = inception.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) + images = tflib.convert_images_to_uint8(images) + result_expr.append(inception_clone.get_output_for(images)) + + # Calculate activations for fakes. + for begin in range(0, self.num_images, minibatch_size): + self._report_progress(begin, self.num_images) + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] + + # Calculate IS. + scores = [] + for i in range(self.num_splits): + part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits] + kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) + kl = np.mean(np.sum(kl, 1)) + scores.append(np.exp(kl)) + self._report_result(np.mean(scores), suffix='_mean') + self._report_result(np.std(scores), suffix='_std') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/linear_separability.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/linear_separability.py new file mode 100644 index 0000000000000000000000000000000000000000..a690aa9d36d0dd3d22af89ca6a1898edf8bc6958 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/linear_separability.py @@ -0,0 +1,178 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Linear Separability (LS).""" + +from collections import defaultdict +import numpy as np +import sklearn.svm +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +classifier_urls = [ + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-00-male.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-01-smiling.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-02-attractive.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-03-wavy-hair.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-04-young.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-05-5-o-clock-shadow.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-06-arched-eyebrows.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-07-bags-under-eyes.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-08-bald.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-09-bangs.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-10-big-lips.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-11-big-nose.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-12-black-hair.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-13-blond-hair.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-14-blurry.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-15-brown-hair.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-16-bushy-eyebrows.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-17-chubby.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-18-double-chin.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-19-eyeglasses.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-20-goatee.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-21-gray-hair.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-22-heavy-makeup.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-23-high-cheekbones.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-24-mouth-slightly-open.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-25-mustache.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-26-narrow-eyes.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-27-no-beard.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-28-oval-face.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-29-pale-skin.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-30-pointy-nose.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-31-receding-hairline.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-32-rosy-cheeks.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-33-sideburns.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-34-straight-hair.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-35-wearing-earrings.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-36-wearing-hat.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-37-wearing-lipstick.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-38-wearing-necklace.pkl', + 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-39-wearing-necktie.pkl', +] + +#---------------------------------------------------------------------------- + +def prob_normalize(p): + p = np.asarray(p).astype(np.float32) + assert len(p.shape) == 2 + return p / np.sum(p) + +def mutual_information(p): + p = prob_normalize(p) + px = np.sum(p, axis=1) + py = np.sum(p, axis=0) + result = 0.0 + for x in range(p.shape[0]): + p_x = px[x] + for y in range(p.shape[1]): + p_xy = p[x][y] + p_y = py[y] + if p_xy > 0.0: + result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output + return result + +def entropy(p): + p = prob_normalize(p) + result = 0.0 + for x in range(p.shape[0]): + for y in range(p.shape[1]): + p_xy = p[x][y] + if p_xy > 0.0: + result -= p_xy * np.log2(p_xy) + return result + +def conditional_entropy(p): + # H(Y|X) where X corresponds to axis 0, Y to axis 1 + # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0? + p = prob_normalize(p) + y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y) + return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up. + +#---------------------------------------------------------------------------- + +class LS(metric_base.MetricBase): + def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs): + assert num_keep <= num_samples + super().__init__(**kwargs) + self.num_samples = num_samples + self.num_keep = num_keep + self.attrib_indices = attrib_indices + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph for each GPU. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + + # Generate images. + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **Gs_kwargs) + images = Gs_clone.get_output_for(latents, None, **Gs_kwargs) + + # Downsample to 256x256. The attribute classifiers were built for 256x256. + if images.shape[2] > 256: + factor = images.shape[2] // 256 + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3, 5]) + + # Run classifier for each attribute. + result_dict = dict(latents=latents, dlatents=dlatents[:,-1]) + for attrib_idx in self.attrib_indices: + classifier = misc.load_pkl(classifier_urls[attrib_idx]) + logits = classifier.get_output_for(images, None) + predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1)) + result_dict[attrib_idx] = predictions + result_expr.append(result_dict) + + # Sampling loop. + results = [] + for begin in range(0, self.num_samples, minibatch_size): + self._report_progress(begin, self.num_samples) + results += tflib.run(result_expr) + results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()} + + # Calculate conditional entropy for each attribute. + conditional_entropies = defaultdict(list) + for attrib_idx in self.attrib_indices: + # Prune the least confident samples. + pruned_indices = list(range(self.num_samples)) + pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) + pruned_indices = pruned_indices[:self.num_keep] + + # Fit SVM to the remaining samples. + svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) + for space in ['latents', 'dlatents']: + svm_inputs = results[space][pruned_indices] + try: + svm = sklearn.svm.LinearSVC() + svm.fit(svm_inputs, svm_targets) + svm.score(svm_inputs, svm_targets) + svm_outputs = svm.predict(svm_inputs) + except: + svm_outputs = svm_targets # assume perfect prediction + + # Calculate conditional entropy. + p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)] + conditional_entropies[space].append(conditional_entropy(p)) + + # Calculate separability scores. + scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()} + self._report_result(scores['latents'], suffix='_z') + self._report_result(scores['dlatents'], suffix='_w') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/metric_base.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/metric_base.py new file mode 100644 index 0000000000000000000000000000000000000000..cbd0276d28d31ce3f3cec99b2142ed11d9bef340 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/metric_base.py @@ -0,0 +1,168 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Common definitions for GAN metrics.""" + +import os +import time +import hashlib +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from training import misc +from training import dataset + +#---------------------------------------------------------------------------- +# Base class for metrics. + +class MetricBase: + def __init__(self, name): + self.name = name + self._dataset_obj = None + self._progress_lo = None + self._progress_hi = None + self._progress_max = None + self._progress_sec = None + self._progress_time = None + self._reset() + + def close(self): + self._reset() + + def _reset(self, network_pkl=None, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None): + if self._dataset_obj is not None: + self._dataset_obj.close() + + self._network_pkl = network_pkl + self._data_dir = data_dir + self._dataset_args = dataset_args + self._dataset_obj = None + self._mirror_augment = mirror_augment + self._eval_time = 0 + self._results = [] + + if (dataset_args is None or mirror_augment is None) and run_dir is not None: + run_config = misc.parse_config_for_previous_run(run_dir) + self._dataset_args = dict(run_config['dataset']) + self._dataset_args['shuffle_mb'] = 0 + self._mirror_augment = run_config['train'].get('mirror_augment', False) + + def configure_progress_reports(self, plo, phi, pmax, psec=15): + self._progress_lo = plo + self._progress_hi = phi + self._progress_max = pmax + self._progress_sec = psec + + def run(self, network_pkl, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None, num_gpus=1, tf_config=None, log_results=True, Gs_kwargs=dict(is_validation=True)): + self._reset(network_pkl=network_pkl, run_dir=run_dir, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment) + time_begin = time.time() + with tf.Graph().as_default(), tflib.create_session(tf_config).as_default(): # pylint: disable=not-context-manager + self._report_progress(0, 1) + _G, _D, Gs = misc.load_pkl(self._network_pkl) + self._evaluate(Gs, Gs_kwargs=Gs_kwargs, num_gpus=num_gpus) + self._report_progress(1, 1) + self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init + + if log_results: + if run_dir is not None: + log_file = os.path.join(run_dir, 'metric-%s.txt' % self.name) + with dnnlib.util.Logger(log_file, 'a'): + print(self.get_result_str().strip()) + else: + print(self.get_result_str().strip()) + + def get_result_str(self): + network_name = os.path.splitext(os.path.basename(self._network_pkl))[0] + if len(network_name) > 29: + network_name = '...' + network_name[-26:] + result_str = '%-30s' % network_name + result_str += ' time %-12s' % dnnlib.util.format_time(self._eval_time) + for res in self._results: + result_str += ' ' + self.name + res.suffix + ' ' + result_str += res.fmt % res.value + return result_str + + def update_autosummaries(self): + for res in self._results: + tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value) + + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + raise NotImplementedError # to be overridden by subclasses + + def _report_result(self, value, suffix='', fmt='%-10.4f'): + self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)] + + def _report_progress(self, pcur, pmax, status_str=''): + if self._progress_lo is None or self._progress_hi is None or self._progress_max is None: + return + t = time.time() + if self._progress_sec is not None and self._progress_time is not None and t < self._progress_time + self._progress_sec: + return + self._progress_time = t + val = self._progress_lo + (pcur / pmax) * (self._progress_hi - self._progress_lo) + dnnlib.RunContext.get().update(status_str, int(val), self._progress_max) + + def _get_cache_file_for_reals(self, extension='pkl', **kwargs): + all_args = dnnlib.EasyDict(metric_name=self.name, mirror_augment=self._mirror_augment) + all_args.update(self._dataset_args) + all_args.update(kwargs) + md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8')) + dataset_name = self._dataset_args.get('tfrecord_dir', None) or self._dataset_args.get('h5_file', None) + dataset_name = os.path.splitext(os.path.basename(dataset_name))[0] + return os.path.join('.stylegan2-cache', '%s-%s-%s.%s' % (md5.hexdigest(), self.name, dataset_name, extension)) + + def _get_dataset_obj(self): + if self._dataset_obj is None: + self._dataset_obj = dataset.load_dataset(data_dir=self._data_dir, **self._dataset_args) + return self._dataset_obj + + def _iterate_reals(self, minibatch_size): + dataset_obj = self._get_dataset_obj() + while True: + images, _labels = dataset_obj.get_minibatch_np(minibatch_size) + if self._mirror_augment: + images = misc.apply_mirror_augment(images) + yield images + + def _iterate_fakes(self, Gs, minibatch_size, num_gpus): + while True: + latents = np.random.randn(minibatch_size, *Gs.input_shape[1:]) + fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) + images = Gs.run(latents, None, output_transform=fmt, is_validation=True, num_gpus=num_gpus, assume_frozen=True) + yield images + + def _get_random_labels_tf(self, minibatch_size): + return self._get_dataset_obj().get_random_labels_tf(minibatch_size) + +#---------------------------------------------------------------------------- +# Group of multiple metrics. + +class MetricGroup: + def __init__(self, metric_kwarg_list): + self.metrics = [dnnlib.util.call_func_by_name(**kwargs) for kwargs in metric_kwarg_list] + + def run(self, *args, **kwargs): + for metric in self.metrics: + metric.run(*args, **kwargs) + + def get_result_str(self): + return ' '.join(metric.get_result_str() for metric in self.metrics) + + def update_autosummaries(self): + for metric in self.metrics: + metric.update_autosummaries() + +#---------------------------------------------------------------------------- +# Dummy metric for debugging purposes. + +class DummyMetric(MetricBase): + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + _ = Gs, Gs_kwargs, num_gpus + self._report_result(0.0) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/metric_defaults.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/metric_defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..4371db8180ab71a625eb0f0520522816e4bd93b4 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/metric_defaults.py @@ -0,0 +1,25 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Default metric definitions.""" + +from dnnlib import EasyDict + +#---------------------------------------------------------------------------- + +metric_defaults = EasyDict([(args.name, args) for args in [ + EasyDict(name='fid50k', func_name='metrics.frechet_inception_distance.FID', num_images=50000, minibatch_per_gpu=8), + EasyDict(name='is50k', func_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8), + EasyDict(name='ppl_zfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), + EasyDict(name='ppl_wfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), + EasyDict(name='ppl_zend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), + EasyDict(name='ppl_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), + EasyDict(name='ppl2_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), + EasyDict(name='ls', func_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4), + EasyDict(name='pr50k3', func_name='metrics.precision_recall.PR', num_images=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000), +]]) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/perceptual_path_length.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/perceptual_path_length.py new file mode 100644 index 0000000000000000000000000000000000000000..e0fcb27389f60e484f6cd3dd6dfcc060add798ad --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/perceptual_path_length.py @@ -0,0 +1,116 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Perceptual Path Length (PPL).""" + +import numpy as np +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +# Normalize batch of vectors. +def normalize(v): + return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True)) + +# Spherical interpolation of a batch of vectors. +def slerp(a, b, t): + a = normalize(a) + b = normalize(b) + d = tf.reduce_sum(a * b, axis=-1, keepdims=True) + p = t * tf.math.acos(d) + c = normalize(b - d * a) + d = a * tf.math.cos(p) + c * tf.math.sin(p) + return normalize(d) + +#---------------------------------------------------------------------------- + +class PPL(metric_base.MetricBase): + def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, Gs_overrides, **kwargs): + assert space in ['z', 'w'] + assert sampling in ['full', 'end'] + super().__init__(**kwargs) + self.num_samples = num_samples + self.epsilon = epsilon + self.space = space + self.sampling = sampling + self.crop = crop + self.minibatch_per_gpu = minibatch_per_gpu + self.Gs_overrides = Gs_overrides + + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + Gs_kwargs = dict(Gs_kwargs) + Gs_kwargs.update(self.Gs_overrides) + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph. + distance_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')] + + # Generate random latents and interpolation t-values. + lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) + lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) + labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) + + # Interpolate in W or Z. + if self.space == 'w': + dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **Gs_kwargs) + dlat_t01 = tf.cast(dlat_t01, tf.float32) + dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] + dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) + dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) + dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) + else: # space == 'z' + lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] + lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) + lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) + lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) + dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **Gs_kwargs) + + # Synthesize images. + with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch + images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **Gs_kwargs) + images = tf.cast(images, tf.float32) + + # Crop only the face region. + if self.crop: + c = int(images.shape[2] // 8) + images = images[:, :, c*3 : c*7, c*2 : c*6] + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + factor = images.shape[2] // 256 + if factor > 1: + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3,5]) + + # Scale dynamic range from [-1,1] to [0,255] for VGG. + images = (images + 1) * (255 / 2) + + # Evaluate perceptual distance. + img_e0, img_e1 = images[0::2], images[1::2] + distance_measure = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl') + distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) + + # Sampling loop. + all_distances = [] + for begin in range(0, self.num_samples, minibatch_size): + self._report_progress(begin, self.num_samples) + all_distances += tflib.run(distance_expr) + all_distances = np.concatenate(all_distances, axis=0) + + # Reject outliers. + lo = np.percentile(all_distances, 1, interpolation='lower') + hi = np.percentile(all_distances, 99, interpolation='higher') + filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) + self._report_result(np.mean(filtered_distances)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/precision_recall.py b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/precision_recall.py new file mode 100644 index 0000000000000000000000000000000000000000..629d88d0b86f019b98f887fc4ff3cff83cadf5d2 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/metrics/precision_recall.py @@ -0,0 +1,224 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Precision/Recall (PR).""" + +import os +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +def batch_pairwise_distances(U, V): + """ Compute pairwise distances between two batches of feature vectors.""" + with tf.variable_scope('pairwise_dist_block'): + # Squared norms of each row in U and V. + norm_u = tf.reduce_sum(tf.square(U), 1) + norm_v = tf.reduce_sum(tf.square(V), 1) + + # norm_u as a row and norm_v as a column vectors. + norm_u = tf.reshape(norm_u, [-1, 1]) + norm_v = tf.reshape(norm_v, [1, -1]) + + # Pairwise squared Euclidean distances. + D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0) + + return D + +#---------------------------------------------------------------------------- + +class DistanceBlock(): + """Distance block.""" + def __init__(self, num_features, num_gpus): + self.num_features = num_features + self.num_gpus = num_gpus + + # Initialize TF graph to calculate pairwise distances. + with tf.device('/cpu:0'): + self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features]) + self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features]) + features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0) + distances_split = [] + for gpu_idx in range(self.num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx])) + self._distance_block = tf.concat(distances_split, axis=1) + + def pairwise_distances(self, U, V): + """Evaluate pairwise distances between two batches of feature vectors.""" + return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V}) + +#---------------------------------------------------------------------------- + +class ManifoldEstimator(): + """Finds an estimate for the manifold of given feature vectors.""" + def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None): + """Find an estimate of the manifold of given feature vectors.""" + num_images = features.shape[0] + self.nhood_sizes = nhood_sizes + self.num_nhoods = len(nhood_sizes) + self.row_batch_size = row_batch_size + self.col_batch_size = col_batch_size + self._ref_features = features + self._distance_block = distance_block + + # Estimate manifold of features by calculating distances to kth nearest neighbor of each sample. + self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16) + distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16) + seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32) + + for begin1 in range(0, num_images, row_batch_size): + end1 = min(begin1 + row_batch_size, num_images) + row_batch = features[begin1:end1] + + for begin2 in range(0, num_images, col_batch_size): + end2 = min(begin2 + col_batch_size, num_images) + col_batch = features[begin2:end2] + + # Compute distances between batches. + distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch) + + # Find the kth nearest neighbor from the current batch. + self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes] + + if clamp_to_percentile is not None: + max_distances = np.percentile(self.D, clamp_to_percentile, axis=0) + self.D[self.D > max_distances] = 0 #max_distances # 0 + + def evaluate(self, eval_features, return_realism=False, return_neighbors=False): + """Evaluate if new feature vectors are in the estimated manifold.""" + num_eval_images = eval_features.shape[0] + num_ref_images = self.D.shape[0] + distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16) + batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32) + #max_realism_score = np.zeros([num_eval_images,], dtype=np.float32) + realism_score = np.zeros([num_eval_images,], dtype=np.float32) + nearest_indices = np.zeros([num_eval_images,], dtype=np.int32) + + for begin1 in range(0, num_eval_images, self.row_batch_size): + end1 = min(begin1 + self.row_batch_size, num_eval_images) + feature_batch = eval_features[begin1:end1] + + for begin2 in range(0, num_ref_images, self.col_batch_size): + end2 = min(begin2 + self.col_batch_size, num_ref_images) + ref_batch = self._ref_features[begin2:end2] + + distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch) + + # From the minibatch of new feature vectors, determine if they are in the estimated manifold. + # If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold. + # The radii of the hyperspheres are determined from distances of neighborhood size k. + samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D + batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32) + + #max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) + #nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) + nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1) + realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1) + + if return_realism and return_neighbors: + return batch_predictions, realism_score, nearest_indices + elif return_realism: + return batch_predictions, realism_score + elif return_neighbors: + return batch_predictions, nearest_indices + + return batch_predictions + +#---------------------------------------------------------------------------- + +def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes, + row_batch_size, col_batch_size, num_gpus): + """Calculates k-NN precision and recall for two sets of feature vectors.""" + state = dnnlib.EasyDict() + #num_images = ref_features.shape[0] + num_features = feature_net.output_shape[1] + state.ref_features = ref_features + state.eval_features = eval_features + + # Initialize DistanceBlock and ManifoldEstimators. + distance_block = DistanceBlock(num_features, num_gpus) + state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes) + state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes) + + # Evaluate precision and recall using k-nearest neighbors. + #print('Evaluating k-NN precision and recall with %i samples...' % num_images) + #start = time.time() + + # Precision: How many points from eval_features are in ref_features manifold. + state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True) + state.knn_precision = state.precision.mean(axis=0) + + # Recall: How many points from ref_features are in eval_features manifold. + state.recall = state.eval_manifold.evaluate(state.ref_features) + state.knn_recall = state.recall.mean(axis=0) + + #elapsed_time = time.time() - start + #print('Done evaluation in: %gs' % elapsed_time) + + return state + +#---------------------------------------------------------------------------- + +class PR(metric_base.MetricBase): + def __init__(self, num_images, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs): + super().__init__(**kwargs) + self.num_images = num_images + self.nhood_size = nhood_size + self.minibatch_per_gpu = minibatch_per_gpu + self.row_batch_size = row_batch_size + self.col_batch_size = col_batch_size + + def _evaluate(self, Gs, Gs_kwargs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + feature_net = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16.pkl') + + # Calculate features for reals. + cache_file = self._get_cache_file_for_reals(num_images=self.num_images) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if os.path.isfile(cache_file): + ref_features = misc.load_pkl(cache_file) + else: + ref_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32) + for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)): + begin = idx * minibatch_size + end = min(begin + minibatch_size, self.num_images) + ref_features[begin:end] = feature_net.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True) + if end == self.num_images: + break + misc.save_pkl(ref_features, cache_file) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate features for fakes. + eval_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32) + for begin in range(0, self.num_images, minibatch_size): + self._report_progress(begin, self.num_images) + end = min(begin + minibatch_size, self.num_images) + eval_features[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] + + # Calculate precision and recall. + state = knn_precision_recall_features(ref_features=ref_features, eval_features=eval_features, feature_net=feature_net, + nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus) + self._report_result(state.knn_precision[0], suffix='_precision') + self._report_result(state.knn_recall[0], suffix='_recall') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/pretrained_networks.py b/ContraCLIP/models/genforce/converters/stylegan2_official/pretrained_networks.py new file mode 100644 index 0000000000000000000000000000000000000000..40ccfd9eb70417149dd7e8ee77735b26c2f919d5 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/pretrained_networks.py @@ -0,0 +1,80 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""List of pre-trained StyleGAN2 networks located on Google Drive.""" + +import pickle +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- +# StyleGAN2 Google Drive root: https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7 + +gdrive_urls = { + 'gdrive:networks/stylegan2-car-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-a.pkl', + 'gdrive:networks/stylegan2-car-config-b.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-b.pkl', + 'gdrive:networks/stylegan2-car-config-c.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-c.pkl', + 'gdrive:networks/stylegan2-car-config-d.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-d.pkl', + 'gdrive:networks/stylegan2-car-config-e.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-e.pkl', + 'gdrive:networks/stylegan2-car-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-f.pkl', + 'gdrive:networks/stylegan2-cat-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-cat-config-a.pkl', + 'gdrive:networks/stylegan2-cat-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-cat-config-f.pkl', + 'gdrive:networks/stylegan2-church-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-church-config-a.pkl', + 'gdrive:networks/stylegan2-church-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-church-config-f.pkl', + 'gdrive:networks/stylegan2-ffhq-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-a.pkl', + 'gdrive:networks/stylegan2-ffhq-config-b.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-b.pkl', + 'gdrive:networks/stylegan2-ffhq-config-c.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-c.pkl', + 'gdrive:networks/stylegan2-ffhq-config-d.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-d.pkl', + 'gdrive:networks/stylegan2-ffhq-config-e.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-e.pkl', + 'gdrive:networks/stylegan2-ffhq-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-f.pkl', + 'gdrive:networks/stylegan2-horse-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-horse-config-a.pkl', + 'gdrive:networks/stylegan2-horse-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-horse-config-f.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl', + 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl', + 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl', +} + +#---------------------------------------------------------------------------- + +def get_path_or_url(path_or_gdrive_path): + return gdrive_urls.get(path_or_gdrive_path, path_or_gdrive_path) + +#---------------------------------------------------------------------------- + +_cached_networks = dict() + +def load_networks(path_or_gdrive_path): + path_or_url = get_path_or_url(path_or_gdrive_path) + if path_or_url in _cached_networks: + return _cached_networks[path_or_url] + + if dnnlib.util.is_url(path_or_url): + stream = dnnlib.util.open_url(path_or_url, cache_dir='.stylegan2-cache') + else: + stream = open(path_or_url, 'rb') + + tflib.init_tf() + with stream: + G, D, Gs = pickle.load(stream, encoding='latin1') + _cached_networks[path_or_url] = G, D, Gs + return G, D, Gs + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/projector.py b/ContraCLIP/models/genforce/converters/stylegan2_official/projector.py new file mode 100644 index 0000000000000000000000000000000000000000..584df5aa26323bbb915a87d4cf5c8ebdfa815f2f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/projector.py @@ -0,0 +1,206 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from training import misc + +#---------------------------------------------------------------------------- + +class Projector: + def __init__(self): + self.num_steps = 1000 + self.dlatent_avg_samples = 10000 + self.initial_learning_rate = 0.1 + self.initial_noise_factor = 0.05 + self.lr_rampdown_length = 0.25 + self.lr_rampup_length = 0.05 + self.noise_ramp_length = 0.75 + self.regularize_noise_weight = 1e5 + self.verbose = False + self.clone_net = True + + self._Gs = None + self._minibatch_size = None + self._dlatent_avg = None + self._dlatent_std = None + self._noise_vars = None + self._noise_init_op = None + self._noise_normalize_op = None + self._dlatents_var = None + self._noise_in = None + self._dlatents_expr = None + self._images_expr = None + self._target_images_var = None + self._lpips = None + self._dist = None + self._loss = None + self._reg_sizes = None + self._lrate_in = None + self._opt = None + self._opt_step = None + self._cur_step = None + + def _info(self, *args): + if self.verbose: + print('Projector:', *args) + + def set_network(self, Gs, minibatch_size=1): + assert minibatch_size == 1 + self._Gs = Gs + self._minibatch_size = minibatch_size + if self._Gs is None: + return + if self.clone_net: + self._Gs = self._Gs.clone() + + # Find dlatent stats. + self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples) + latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512] + self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512] + self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 + self._info('std = %g' % self._dlatent_std) + + # Find noise inputs. + self._info('Setting up noise inputs...') + self._noise_vars = [] + noise_init_ops = [] + noise_normalize_ops = [] + while True: + n = 'G_synthesis/noise%d' % len(self._noise_vars) + if not n in self._Gs.vars: + break + v = self._Gs.vars[n] + self._noise_vars.append(v) + noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32))) + noise_mean = tf.reduce_mean(v) + noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5 + noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std)) + self._info(n, v) + self._noise_init_op = tf.group(*noise_init_ops) + self._noise_normalize_op = tf.group(*noise_normalize_ops) + + # Image output graph. + self._info('Building image output graph...') + self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') + self._noise_in = tf.placeholder(tf.float32, [], name='noise_in') + dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in + self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1]) + self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False) + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + proc_images_expr = (self._images_expr + 1) * (255 / 2) + sh = proc_images_expr.shape.as_list() + if sh[2] > 256: + factor = sh[2] // 256 + proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5]) + + # Loss graph. + self._info('Building loss graph...') + self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var') + if self._lpips is None: + self._lpips = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl') + self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var) + self._loss = tf.reduce_sum(self._dist) + + # Noise regularization graph. + self._info('Building noise regularization graph...') + reg_loss = 0.0 + for v in self._noise_vars: + sz = v.shape[2] + while True: + reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2 + if sz <= 8: + break # Small enough already + v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale + v = tf.reduce_mean(v, axis=[3, 5]) + sz = sz // 2 + self._loss += reg_loss * self.regularize_noise_weight + + # Optimizer. + self._info('Setting up optimizer...') + self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in') + self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in) + self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars) + self._opt_step = self._opt.apply_updates() + + def run(self, target_images): + # Run to completion. + self.start(target_images) + while self._cur_step < self.num_steps: + self.step() + + # Collect results. + pres = dnnlib.EasyDict() + pres.dlatents = self.get_dlatents() + pres.noises = self.get_noises() + pres.images = self.get_images() + return pres + + def start(self, target_images): + assert self._Gs is not None + + # Prepare target images. + self._info('Preparing target images...') + target_images = np.asarray(target_images, dtype='float32') + target_images = (target_images + 1) * (255 / 2) + sh = target_images.shape + assert sh[0] == self._minibatch_size + if sh[2] > self._target_images_var.shape[2]: + factor = sh[2] // self._target_images_var.shape[2] + target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5)) + + # Initialize optimization state. + self._info('Initializing optimization state...') + tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])}) + tflib.run(self._noise_init_op) + self._opt.reset_optimizer_state() + self._cur_step = 0 + + def step(self): + assert self._cur_step is not None + if self._cur_step >= self.num_steps: + return + if self._cur_step == 0: + self._info('Running...') + + # Hyperparameters. + t = self._cur_step / self.num_steps + noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2 + lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length) + lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) + lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length) + learning_rate = self.initial_learning_rate * lr_ramp + + # Train. + feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate} + _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict) + tflib.run(self._noise_normalize_op) + + # Print status. + self._cur_step += 1 + if self._cur_step == self.num_steps or self._cur_step % 10 == 0: + self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value)) + if self._cur_step == self.num_steps: + self._info('Done.') + + def get_cur_step(self): + return self._cur_step + + def get_dlatents(self): + return tflib.run(self._dlatents_expr, {self._noise_in: 0}) + + def get_noises(self): + return tflib.run(self._noise_vars) + + def get_images(self): + return tflib.run(self._images_expr, {self._noise_in: 0}) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/run_generator.py b/ContraCLIP/models/genforce/converters/stylegan2_official/run_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..339796c99b557507a53923d8974ac10b7f3d145b --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/run_generator.py @@ -0,0 +1,170 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +import argparse +import numpy as np +import PIL.Image +import dnnlib +import dnnlib.tflib as tflib +import re +import sys + +import pretrained_networks + +#---------------------------------------------------------------------------- + +def generate_images(network_pkl, seeds, truncation_psi): + print('Loading networks from "%s"...' % network_pkl) + _G, _D, Gs = pretrained_networks.load_networks(network_pkl) + noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')] + + Gs_kwargs = dnnlib.EasyDict() + Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) + Gs_kwargs.randomize_noise = False + if truncation_psi is not None: + Gs_kwargs.truncation_psi = truncation_psi + + for seed_idx, seed in enumerate(seeds): + print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) + rnd = np.random.RandomState(seed) + z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component] + tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] + images = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel] + PIL.Image.fromarray(images[0], 'RGB').save(dnnlib.make_run_dir_path('seed%04d.png' % seed)) + +#---------------------------------------------------------------------------- + +def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, minibatch_size=4): + print('Loading networks from "%s"...' % network_pkl) + _G, _D, Gs = pretrained_networks.load_networks(network_pkl) + w_avg = Gs.get_var('dlatent_avg') # [component] + + Gs_syn_kwargs = dnnlib.EasyDict() + Gs_syn_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) + Gs_syn_kwargs.randomize_noise = False + Gs_syn_kwargs.minibatch_size = minibatch_size + + print('Generating W vectors...') + all_seeds = list(set(row_seeds + col_seeds)) + all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component] + all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component] + all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component] + w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component] + + print('Generating images...') + all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel] + image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} + + print('Generating style-mixed images...') + for row_seed in row_seeds: + for col_seed in col_seeds: + w = w_dict[row_seed].copy() + w[col_styles] = w_dict[col_seed][col_styles] + image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0] + image_dict[(row_seed, col_seed)] = image + + print('Saving images...') + for (row_seed, col_seed), image in image_dict.items(): + PIL.Image.fromarray(image, 'RGB').save(dnnlib.make_run_dir_path('%d-%d.png' % (row_seed, col_seed))) + + print('Saving image grid...') + _N, _C, H, W = Gs.output_shape + canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') + for row_idx, row_seed in enumerate([None] + row_seeds): + for col_idx, col_seed in enumerate([None] + col_seeds): + if row_seed is None and col_seed is None: + continue + key = (row_seed, col_seed) + if row_seed is None: + key = (col_seed, col_seed) + if col_seed is None: + key = (row_seed, row_seed) + canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) + canvas.save(dnnlib.make_run_dir_path('grid.png')) + +#---------------------------------------------------------------------------- + +def _parse_num_range(s): + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + # Generate ffhq uncurated images (matches paper Figure 12) + python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=6600-6625 --truncation-psi=0.5 + + # Generate ffhq curated images (matches paper Figure 11) + python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=66,230,389,1518 --truncation-psi=1.0 + + # Generate uncurated car images (matches paper Figure 12) + python %(prog)s generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=6000-6025 --truncation-psi=0.5 + + # Generate style mixing example (matches style mixing video clip) + python %(prog)s style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0 +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='''StyleGAN2 generator. + +Run 'python %(prog)s --help' for subcommand help.''', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + subparsers = parser.add_subparsers(help='Sub-commands', dest='command') + + parser_generate_images = subparsers.add_parser('generate-images', help='Generate images') + parser_generate_images.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser_generate_images.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', required=True) + parser_generate_images.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) + parser_generate_images.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') + + parser_style_mixing_example = subparsers.add_parser('style-mixing-example', help='Generate style mixing video') + parser_style_mixing_example.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser_style_mixing_example.add_argument('--row-seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True) + parser_style_mixing_example.add_argument('--col-seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True) + parser_style_mixing_example.add_argument('--col-styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6') + parser_style_mixing_example.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) + parser_style_mixing_example.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') + + args = parser.parse_args() + kwargs = vars(args) + subcmd = kwargs.pop('command') + + if subcmd is None: + print ('Error: missing subcommand. Re-run with --help for usage.') + sys.exit(1) + + sc = dnnlib.SubmitConfig() + sc.num_gpus = 1 + sc.submit_target = dnnlib.SubmitTarget.LOCAL + sc.local.do_not_copy_source_files = True + sc.run_dir_root = kwargs.pop('result_dir') + sc.run_desc = subcmd + + func_name_map = { + 'generate-images': 'run_generator.generate_images', + 'style-mixing-example': 'run_generator.style_mixing_example' + } + dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/run_metrics.py b/ContraCLIP/models/genforce/converters/stylegan2_official/run_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..5043b100faf3f58273cdf00239611d950962324c --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/run_metrics.py @@ -0,0 +1,86 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +import argparse +import os +import sys + +import dnnlib +import dnnlib.tflib as tflib + +import pretrained_networks +from metrics import metric_base +from metrics.metric_defaults import metric_defaults + +#---------------------------------------------------------------------------- + +def run(network_pkl, metrics, dataset, data_dir, mirror_augment): + print('Evaluating metrics "%s" for "%s"...' % (','.join(metrics), network_pkl)) + tflib.init_tf() + network_pkl = pretrained_networks.get_path_or_url(network_pkl) + dataset_args = dnnlib.EasyDict(tfrecord_dir=dataset, shuffle_mb=0) + num_gpus = dnnlib.submit_config.num_gpus + metric_group = metric_base.MetricGroup([metric_defaults[metric] for metric in metrics]) + metric_group.run(network_pkl, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment, num_gpus=num_gpus) + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + python %(prog)s --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true + +valid metrics: + + ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + ''' +''' + +def main(): + parser = argparse.ArgumentParser( + description='Run StyleGAN2 metrics.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser.add_argument('--metrics', help='Metrics to compute (default: %(default)s)', default='fid50k', type=lambda x: x.split(',')) + parser.add_argument('--dataset', help='Training dataset', required=True) + parser.add_argument('--data-dir', help='Dataset root directory', required=True) + parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, type=_str_to_bool, metavar='BOOL') + parser.add_argument('--num-gpus', help='Number of GPUs to use', type=int, default=1, metavar='N') + + args = parser.parse_args() + + if not os.path.exists(args.data_dir): + print ('Error: dataset root directory does not exist.') + sys.exit(1) + + kwargs = vars(args) + sc = dnnlib.SubmitConfig() + sc.num_gpus = kwargs.pop('num_gpus') + sc.submit_target = dnnlib.SubmitTarget.LOCAL + sc.local.do_not_copy_source_files = True + sc.run_dir_root = kwargs.pop('result_dir') + sc.run_desc = 'run-metrics' + dnnlib.submit_run(sc, 'run_metrics.run', **kwargs) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/run_projector.py b/ContraCLIP/models/genforce/converters/stylegan2_official/run_projector.py new file mode 100644 index 0000000000000000000000000000000000000000..5fd89ed7304e6736a4b2f5b4a17e8463adf9539a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/run_projector.py @@ -0,0 +1,148 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +import argparse +import numpy as np +import dnnlib +import dnnlib.tflib as tflib +import re +import sys + +import projector +import pretrained_networks +from training import dataset +from training import misc + +#---------------------------------------------------------------------------- + +def project_image(proj, targets, png_prefix, num_snapshots): + snapshot_steps = set(proj.num_steps - np.linspace(0, proj.num_steps, num_snapshots, endpoint=False, dtype=int)) + misc.save_image_grid(targets, png_prefix + 'target.png', drange=[-1,1]) + proj.start(targets) + while proj.get_cur_step() < proj.num_steps: + print('\r%d / %d ... ' % (proj.get_cur_step(), proj.num_steps), end='', flush=True) + proj.step() + if proj.get_cur_step() in snapshot_steps: + misc.save_image_grid(proj.get_images(), png_prefix + 'step%04d.png' % proj.get_cur_step(), drange=[-1,1]) + print('\r%-30s\r' % '', end='', flush=True) + +#---------------------------------------------------------------------------- + +def project_generated_images(network_pkl, seeds, num_snapshots, truncation_psi): + print('Loading networks from "%s"...' % network_pkl) + _G, _D, Gs = pretrained_networks.load_networks(network_pkl) + proj = projector.Projector() + proj.set_network(Gs) + noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')] + + Gs_kwargs = dnnlib.EasyDict() + Gs_kwargs.randomize_noise = False + Gs_kwargs.truncation_psi = truncation_psi + + for seed_idx, seed in enumerate(seeds): + print('Projecting seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) + rnd = np.random.RandomState(seed) + z = rnd.randn(1, *Gs.input_shape[1:]) + tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) + images = Gs.run(z, None, **Gs_kwargs) + project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('seed%04d-' % seed), num_snapshots=num_snapshots) + +#---------------------------------------------------------------------------- + +def project_real_images(network_pkl, dataset_name, data_dir, num_images, num_snapshots): + print('Loading networks from "%s"...' % network_pkl) + _G, _D, Gs = pretrained_networks.load_networks(network_pkl) + proj = projector.Projector() + proj.set_network(Gs) + + print('Loading images from "%s"...' % dataset_name) + dataset_obj = dataset.load_dataset(data_dir=data_dir, tfrecord_dir=dataset_name, max_label_size=0, repeat=False, shuffle_mb=0) + assert dataset_obj.shape == Gs.output_shape[1:] + + for image_idx in range(num_images): + print('Projecting image %d/%d ...' % (image_idx, num_images)) + images, _labels = dataset_obj.get_minibatch_np(1) + images = misc.adjust_dynamic_range(images, [0, 255], [-1, 1]) + project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('image%04d-' % image_idx), num_snapshots=num_snapshots) + +#---------------------------------------------------------------------------- + +def _parse_num_range(s): + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + # Project generated images + python %(prog)s project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=0,1,5 + + # Project real images + python %(prog)s project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl --dataset=car --data-dir=~/datasets + +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='''StyleGAN2 projector. + +Run 'python %(prog)s --help' for subcommand help.''', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + subparsers = parser.add_subparsers(help='Sub-commands', dest='command') + + project_generated_images_parser = subparsers.add_parser('project-generated-images', help='Project generated images') + project_generated_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + project_generated_images_parser.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', default=range(3)) + project_generated_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5) + project_generated_images_parser.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=1.0) + project_generated_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') + + project_real_images_parser = subparsers.add_parser('project-real-images', help='Project real images') + project_real_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + project_real_images_parser.add_argument('--data-dir', help='Dataset root directory', required=True) + project_real_images_parser.add_argument('--dataset', help='Training dataset', dest='dataset_name', required=True) + project_real_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5) + project_real_images_parser.add_argument('--num-images', type=int, help='Number of images to project (default: %(default)s)', default=3) + project_real_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') + + args = parser.parse_args() + subcmd = args.command + if subcmd is None: + print ('Error: missing subcommand. Re-run with --help for usage.') + sys.exit(1) + + kwargs = vars(args) + sc = dnnlib.SubmitConfig() + sc.num_gpus = 1 + sc.submit_target = dnnlib.SubmitTarget.LOCAL + sc.local.do_not_copy_source_files = True + sc.run_dir_root = kwargs.pop('result_dir') + sc.run_desc = kwargs.pop('command') + + func_name_map = { + 'project-generated-images': 'run_projector.project_generated_images', + 'project-real-images': 'run_projector.project_real_images' + } + dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/run_training.py b/ContraCLIP/models/genforce/converters/stylegan2_official/run_training.py new file mode 100644 index 0000000000000000000000000000000000000000..bc4c0a2bd414d8ddfe0edaa4b29db5532c440ef1 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/run_training.py @@ -0,0 +1,195 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +import argparse +import copy +import os +import sys + +import dnnlib +from dnnlib import EasyDict + +from metrics.metric_defaults import metric_defaults + +#---------------------------------------------------------------------------- + +_valid_configs = [ + # Table 1 + 'config-a', # Baseline StyleGAN + 'config-b', # + Weight demodulation + 'config-c', # + Lazy regularization + 'config-d', # + Path length regularization + 'config-e', # + No growing, new G & D arch. + 'config-f', # + Large networks (default) + + # Table 2 + 'config-e-Gorig-Dorig', 'config-e-Gorig-Dresnet', 'config-e-Gorig-Dskip', + 'config-e-Gresnet-Dorig', 'config-e-Gresnet-Dresnet', 'config-e-Gresnet-Dskip', + 'config-e-Gskip-Dorig', 'config-e-Gskip-Dresnet', 'config-e-Gskip-Dskip', +] + +#---------------------------------------------------------------------------- + +def run(dataset, data_dir, result_dir, config_id, num_gpus, total_kimg, gamma, mirror_augment, metrics): + train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop. + G = EasyDict(func_name='training.networks_stylegan2.G_main') # Options for generator network. + D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2') # Options for discriminator network. + G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. + D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. + G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg') # Options for generator loss. + D_loss = EasyDict(func_name='training.loss.D_logistic_r1') # Options for discriminator loss. + sched = EasyDict() # Options for TrainingSchedule. + grid = EasyDict(size='8k', layout='random') # Options for setup_snapshot_image_grid(). + sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). + tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). + + train.data_dir = data_dir + train.total_kimg = total_kimg + train.mirror_augment = mirror_augment + train.image_snapshot_ticks = train.network_snapshot_ticks = 10 + sched.G_lrate_base = sched.D_lrate_base = 0.002 + sched.minibatch_size_base = 32 + sched.minibatch_gpu_base = 4 + D_loss.gamma = 10 + metrics = [metric_defaults[x] for x in metrics] + desc = 'stylegan2' + + desc += '-' + dataset + dataset_args = EasyDict(tfrecord_dir=dataset) + + assert num_gpus in [1, 2, 4, 8] + sc.num_gpus = num_gpus + desc += '-%dgpu' % num_gpus + + assert config_id in _valid_configs + desc += '-' + config_id + + # Configs A-E: Shrink networks to match original StyleGAN. + if config_id != 'config-f': + G.fmap_base = D.fmap_base = 8 << 10 + + # Config E: Set gamma to 100 and override G & D architecture. + if config_id.startswith('config-e'): + D_loss.gamma = 100 + if 'Gorig' in config_id: G.architecture = 'orig' + if 'Gskip' in config_id: G.architecture = 'skip' # (default) + if 'Gresnet' in config_id: G.architecture = 'resnet' + if 'Dorig' in config_id: D.architecture = 'orig' + if 'Dskip' in config_id: D.architecture = 'skip' + if 'Dresnet' in config_id: D.architecture = 'resnet' # (default) + + # Configs A-D: Enable progressive growing and switch to networks that support it. + if config_id in ['config-a', 'config-b', 'config-c', 'config-d']: + sched.lod_initial_resolution = 8 + sched.G_lrate_base = sched.D_lrate_base = 0.001 + sched.G_lrate_dict = sched.D_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003} + sched.minibatch_size_base = 32 # (default) + sched.minibatch_size_dict = {8: 256, 16: 128, 32: 64, 64: 32} + sched.minibatch_gpu_base = 4 # (default) + sched.minibatch_gpu_dict = {8: 32, 16: 16, 32: 8, 64: 4} + G.synthesis_func = 'G_synthesis_stylegan_revised' + D.func_name = 'training.networks_stylegan2.D_stylegan' + + # Configs A-C: Disable path length regularization. + if config_id in ['config-a', 'config-b', 'config-c']: + G_loss = EasyDict(func_name='training.loss.G_logistic_ns') + + # Configs A-B: Disable lazy regularization. + if config_id in ['config-a', 'config-b']: + train.lazy_regularization = False + + # Config A: Switch to original StyleGAN networks. + if config_id == 'config-a': + G = EasyDict(func_name='training.networks_stylegan.G_style') + D = EasyDict(func_name='training.networks_stylegan.D_basic') + + if gamma is not None: + D_loss.gamma = gamma + + sc.submit_target = dnnlib.SubmitTarget.LOCAL + sc.local.do_not_copy_source_files = True + kwargs = EasyDict(train) + kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss) + kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) + kwargs.submit_config = copy.deepcopy(sc) + kwargs.submit_config.run_dir_root = result_dir + kwargs.submit_config.run_desc = desc + dnnlib.submit_run(**kwargs) + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def _parse_comma_sep(s): + if s is None or s.lower() == 'none' or s == '': + return [] + return s.split(',') + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + # Train StyleGAN2 using the FFHQ dataset + python %(prog)s --num-gpus=8 --data-dir=~/datasets --config=config-f --dataset=ffhq --mirror-augment=true + +valid configs: + + ''' + ', '.join(_valid_configs) + ''' + +valid metrics: + + ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + ''' + +''' + +def main(): + parser = argparse.ArgumentParser( + description='Train StyleGAN2.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') + parser.add_argument('--data-dir', help='Dataset root directory', required=True) + parser.add_argument('--dataset', help='Training dataset', required=True) + parser.add_argument('--config', help='Training config (default: %(default)s)', default='config-f', required=True, dest='config_id', metavar='CONFIG') + parser.add_argument('--num-gpus', help='Number of GPUs (default: %(default)s)', default=1, type=int, metavar='N') + parser.add_argument('--total-kimg', help='Training length in thousands of images (default: %(default)s)', metavar='KIMG', default=25000, type=int) + parser.add_argument('--gamma', help='R1 regularization weight (default is config dependent)', default=None, type=float) + parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, metavar='BOOL', type=_str_to_bool) + parser.add_argument('--metrics', help='Comma-separated list of metrics or "none" (default: %(default)s)', default='fid50k', type=_parse_comma_sep) + + args = parser.parse_args() + + if not os.path.exists(args.data_dir): + print ('Error: dataset root directory does not exist.') + sys.exit(1) + + if args.config_id not in _valid_configs: + print ('Error: --config value must be one of: ', ', '.join(_valid_configs)) + sys.exit(1) + + for metric in args.metrics: + if metric not in metric_defaults: + print ('Error: unknown metric \'%s\'' % metric) + sys.exit(1) + + run(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- + diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/test_nvcc.cu b/ContraCLIP/models/genforce/converters/stylegan2_official/test_nvcc.cu new file mode 100644 index 0000000000000000000000000000000000000000..8b09bbfe01b0a404f32558b4708efa6ece6ddf9f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/test_nvcc.cu @@ -0,0 +1,29 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#include + +void checkCudaError(cudaError_t err) +{ + if (err != cudaSuccess) + { + printf("%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err)); + exit(1); + } +} + +__global__ void cudaKernel(void) +{ + printf("GPU says hello.\n"); +} + +int main(void) +{ + printf("CPU says hello.\n"); + checkCudaError(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL)); + checkCudaError(cudaDeviceSynchronize()); + return 0; +} diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab9908efa3cb38af52e8d5bcaa8acffde5a8875 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/dataset.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2d1059838ab5ec95d5acce62abac6dc93313ba4a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/dataset.py @@ -0,0 +1,199 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Multi-resolution input data pipeline.""" + +import os +import glob +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- +# Dataset class that loads data from tfrecords files. + +class TFRecordDataset: + def __init__(self, + tfrecord_dir, # Directory containing a collection of tfrecords files. + resolution = None, # Dataset resolution, None = autodetect. + label_file = None, # Relative path of the labels file, None = autodetect. + max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components. + max_images = None, # Maximum number of images to use, None = use all images. + repeat = True, # Repeat dataset indefinitely? + shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling. + prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching. + buffer_mb = 256, # Read buffer size (megabytes). + num_threads = 2): # Number of concurrent threads. + + self.tfrecord_dir = tfrecord_dir + self.resolution = None + self.resolution_log2 = None + self.shape = [] # [channels, height, width] + self.dtype = 'uint8' + self.dynamic_range = [0, 255] + self.label_file = label_file + self.label_size = None # components + self.label_dtype = None + self._np_labels = None + self._tf_minibatch_in = None + self._tf_labels_var = None + self._tf_labels_dataset = None + self._tf_datasets = dict() + self._tf_iterator = None + self._tf_init_ops = dict() + self._tf_minibatch_np = None + self._cur_minibatch = -1 + self._cur_lod = -1 + + # List tfrecords files and inspect their shapes. + assert os.path.isdir(self.tfrecord_dir) + tfr_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords'))) + assert len(tfr_files) >= 1 + tfr_shapes = [] + for tfr_file in tfr_files: + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): + tfr_shapes.append(self.parse_tfrecord_np(record).shape) + break + + # Autodetect label filename. + if self.label_file is None: + guess = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.labels'))) + if len(guess): + self.label_file = guess[0] + elif not os.path.isfile(self.label_file): + guess = os.path.join(self.tfrecord_dir, self.label_file) + if os.path.isfile(guess): + self.label_file = guess + + # Determine shape and resolution. + max_shape = max(tfr_shapes, key=np.prod) + self.resolution = resolution if resolution is not None else max_shape[1] + self.resolution_log2 = int(np.log2(self.resolution)) + self.shape = [max_shape[0], self.resolution, self.resolution] + tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes] + assert all(shape[0] == max_shape[0] for shape in tfr_shapes) + assert all(shape[1] == shape[2] for shape in tfr_shapes) + assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods)) + assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) + + # Load labels. + assert max_label_size == 'full' or max_label_size >= 0 + self._np_labels = np.zeros([1<<30, 0], dtype=np.float32) + if self.label_file is not None and max_label_size != 0: + self._np_labels = np.load(self.label_file) + assert self._np_labels.ndim == 2 + if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: + self._np_labels = self._np_labels[:, :max_label_size] + if max_images is not None and self._np_labels.shape[0] > max_images: + self._np_labels = self._np_labels[:max_images] + self.label_size = self._np_labels.shape[1] + self.label_dtype = self._np_labels.dtype.name + + # Build TF expressions. + with tf.name_scope('Dataset'), tf.device('/cpu:0'): + self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[]) + self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var') + self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var) + for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): + if tfr_lod < 0: + continue + dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20) + if max_images is not None: + dset = dset.take(max_images) + dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads) + dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) + bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize + if shuffle_mb > 0: + dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1) + if repeat: + dset = dset.repeat() + if prefetch_mb > 0: + dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1) + dset = dset.batch(self._tf_minibatch_in) + self._tf_datasets[tfr_lod] = dset + self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) + self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()} + + def close(self): + pass + + # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and lod in self._tf_datasets + if self._cur_minibatch != minibatch_size or self._cur_lod != lod: + self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) + self._cur_minibatch = minibatch_size + self._cur_lod = lod + + # Get next minibatch as TensorFlow expressions. + def get_minibatch_tf(self): # => images, labels + return self._tf_iterator.get_next() + + # Get next minibatch as NumPy arrays. + def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels + self.configure(minibatch_size, lod) + with tf.name_scope('Dataset'): + if self._tf_minibatch_np is None: + self._tf_minibatch_np = self.get_minibatch_tf() + return tflib.run(self._tf_minibatch_np) + + # Get random labels as TensorFlow expression. + def get_random_labels_tf(self, minibatch_size): # => labels + with tf.name_scope('Dataset'): + if self.label_size > 0: + with tf.device('/cpu:0'): + return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) + return tf.zeros([minibatch_size, 0], self.label_dtype) + + # Get random labels as NumPy array. + def get_random_labels_np(self, minibatch_size): # => labels + if self.label_size > 0: + return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] + return np.zeros([minibatch_size, 0], self.label_dtype) + + # Parse individual image from a tfrecords file into TensorFlow expression. + @staticmethod + def parse_tfrecord_tf(record): + features = tf.parse_single_example(record, features={ + 'shape': tf.FixedLenFeature([3], tf.int64), + 'data': tf.FixedLenFeature([], tf.string)}) + data = tf.decode_raw(features['data'], tf.uint8) + return tf.reshape(data, features['shape']) + + # Parse individual image from a tfrecords file into NumPy array. + @staticmethod + def parse_tfrecord_np(record): + ex = tf.train.Example() + ex.ParseFromString(record) + shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member + data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member + return np.fromstring(data, np.uint8).reshape(shape) + +#---------------------------------------------------------------------------- +# Helper func for constructing a dataset object using the given options. + +def load_dataset(class_name=None, data_dir=None, verbose=False, **kwargs): + kwargs = dict(kwargs) + if 'tfrecord_dir' in kwargs: + if class_name is None: + class_name = __name__ + '.TFRecordDataset' + if data_dir is not None: + kwargs['tfrecord_dir'] = os.path.join(data_dir, kwargs['tfrecord_dir']) + + assert class_name is not None + if verbose: + print('Streaming data using %s...' % class_name) + dataset = dnnlib.util.get_obj_by_name(class_name)(**kwargs) + if verbose: + print('Dataset shape =', np.int32(dataset.shape).tolist()) + print('Dynamic range =', dataset.dynamic_range) + print('Label size =', dataset.label_size) + return dataset + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/loss.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..7ad2fe16fb657627bfaa547f28653a615ec395c2 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/loss.py @@ -0,0 +1,197 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Loss functions.""" + +import numpy as np +import tensorflow as tf +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +#---------------------------------------------------------------------------- +# Logistic loss from the paper +# "Generative Adversarial Nets", Goodfellow et al. 2014 + +def G_logistic(G, D, opt, training_set, minibatch_size): + _ = opt + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + loss = -tf.nn.softplus(fake_scores_out) # log(1-sigmoid(fake_scores_out)) # pylint: disable=invalid-unary-operand-type + return loss, None + +def G_logistic_ns(G, D, opt, training_set, minibatch_size): + _ = opt + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) + return loss, None + +def D_logistic(G, D, opt, training_set, minibatch_size, reals, labels): + _ = opt, training_set + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = D.get_output_for(reals, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) + loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type + return loss, None + +#---------------------------------------------------------------------------- +# R1 and R2 regularizers from the paper +# "Which Training Methods for GANs do actually Converge?", Mescheder et al. 2018 + +def D_logistic_r1(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0): + _ = opt, training_set + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = D.get_output_for(reals, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) + loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type + + with tf.name_scope('GradientPenalty'): + real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0] + gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1,2,3]) + gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty) + reg = gradient_penalty * (gamma * 0.5) + return loss, reg + +def D_logistic_r2(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0): + _ = opt, training_set + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = D.get_output_for(reals, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) + loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type + + with tf.name_scope('GradientPenalty'): + fake_grads = tf.gradients(tf.reduce_sum(fake_scores_out), [fake_images_out])[0] + gradient_penalty = tf.reduce_sum(tf.square(fake_grads), axis=[1,2,3]) + gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty) + reg = gradient_penalty * (gamma * 0.5) + return loss, reg + +#---------------------------------------------------------------------------- +# WGAN loss from the paper +# "Wasserstein Generative Adversarial Networks", Arjovsky et al. 2017 + +def G_wgan(G, D, opt, training_set, minibatch_size): + _ = opt + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + loss = -fake_scores_out + return loss, None + +def D_wgan(G, D, opt, training_set, minibatch_size, reals, labels, wgan_epsilon=0.001): + _ = opt, training_set + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = D.get_output_for(reals, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = fake_scores_out - real_scores_out + with tf.name_scope('EpsilonPenalty'): + epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) + loss += epsilon_penalty * wgan_epsilon + return loss, None + +#---------------------------------------------------------------------------- +# WGAN-GP loss from the paper +# "Improved Training of Wasserstein GANs", Gulrajani et al. 2017 + +def D_wgan_gp(G, D, opt, training_set, minibatch_size, reals, labels, wgan_lambda=10.0, wgan_epsilon=0.001, wgan_target=1.0): + _ = opt, training_set + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = D.get_output_for(reals, labels, is_training=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = fake_scores_out - real_scores_out + with tf.name_scope('EpsilonPenalty'): + epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) + loss += epsilon_penalty * wgan_epsilon + + with tf.name_scope('GradientPenalty'): + mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) + mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) + mixed_scores_out = D.get_output_for(mixed_images_out, labels, is_training=True) + mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out) + mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out), [mixed_images_out])[0] + mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3])) + mixed_norms = autosummary('Loss/mixed_norms', mixed_norms) + gradient_penalty = tf.square(mixed_norms - wgan_target) + reg = gradient_penalty * (wgan_lambda / (wgan_target**2)) + return loss, reg + +#---------------------------------------------------------------------------- +# Non-saturating logistic loss with path length regularizer from the paper +# "Analyzing and Improving the Image Quality of StyleGAN", Karras et al. 2019 + +def G_logistic_ns_pathreg(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0): + _ = opt + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True) + fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) + loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) + + # Path length regularization. + with tf.name_scope('PathReg'): + + # Evaluate the regularization term using a smaller minibatch to conserve memory. + if pl_minibatch_shrink > 1: + pl_minibatch = minibatch_size // pl_minibatch_shrink + pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:]) + pl_labels = training_set.get_random_labels_tf(pl_minibatch) + fake_images_out, fake_dlatents_out = G.get_output_for(pl_latents, pl_labels, is_training=True, return_dlatents=True) + + # Compute |J*y|. + pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(np.prod(G.output_shape[2:])) + pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0] + pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) + pl_lengths = autosummary('Loss/pl_lengths', pl_lengths) + + # Track exponential moving average of |J*y|. + with tf.control_dependencies(None): + pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32) + pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) + pl_update = tf.assign(pl_mean_var, pl_mean) + + # Calculate (|J*y|-a)^2. + with tf.control_dependencies([pl_update]): + pl_penalty = tf.square(pl_lengths - pl_mean) + pl_penalty = autosummary('Loss/pl_penalty', pl_penalty) + + # Apply weight. + # + # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean + # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: + # + # gamma_pl = pl_weight / num_pixels / num_affine_layers + # = 2 / (r^2) / (log2(r) * 2 - 2) + # = 1 / (r^2 * (log2(r) - 1)) + # = ln(2) / (r^2 * (ln(r) - ln(2)) + # + reg = pl_penalty * pl_weight + + return loss, reg + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/misc.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..9b3444e85c70d9fe742bd2e8055a42210d857f8b --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/misc.py @@ -0,0 +1,145 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous utility functions.""" + +import os +import pickle +import numpy as np +import PIL.Image +import PIL.ImageFont +import dnnlib + +#---------------------------------------------------------------------------- +# Convenience wrappers for pickle that are able to load data produced by +# older versions of the code, and from external URLs. + +def open_file_or_url(file_or_url): + if dnnlib.util.is_url(file_or_url): + return dnnlib.util.open_url(file_or_url, cache_dir='.stylegan2-cache') + return open(file_or_url, 'rb') + +def load_pkl(file_or_url): + with open_file_or_url(file_or_url) as file: + return pickle.load(file, encoding='latin1') + +def save_pkl(obj, filename): + with open(filename, 'wb') as file: + pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) + +#---------------------------------------------------------------------------- +# Image utils. + +def adjust_dynamic_range(data, drange_in, drange_out): + if drange_in != drange_out: + scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0])) + bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale) + data = data * scale + bias + return data + +def create_image_grid(images, grid_size=None): + assert images.ndim == 3 or images.ndim == 4 + num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2] + + if grid_size is not None: + grid_w, grid_h = tuple(grid_size) + else: + grid_w = max(int(np.ceil(np.sqrt(num))), 1) + grid_h = max((num - 1) // grid_w + 1, 1) + + grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype) + for idx in range(num): + x = (idx % grid_w) * img_w + y = (idx // grid_w) * img_h + grid[..., y : y + img_h, x : x + img_w] = images[idx] + return grid + +def convert_to_pil_image(image, drange=[0,1]): + assert image.ndim == 2 or image.ndim == 3 + if image.ndim == 3: + if image.shape[0] == 1: + image = image[0] # grayscale CHW => HW + else: + image = image.transpose(1, 2, 0) # CHW -> HWC + + image = adjust_dynamic_range(image, drange, [0,255]) + image = np.rint(image).clip(0, 255).astype(np.uint8) + fmt = 'RGB' if image.ndim == 3 else 'L' + return PIL.Image.fromarray(image, fmt) + +def save_image_grid(images, filename, drange=[0,1], grid_size=None): + convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename) + +def apply_mirror_augment(minibatch): + mask = np.random.rand(minibatch.shape[0]) < 0.5 + minibatch = np.array(minibatch) + minibatch[mask] = minibatch[mask, :, :, ::-1] + return minibatch + +#---------------------------------------------------------------------------- +# Loading data from previous training runs. + +def parse_config_for_previous_run(run_dir): + with open(os.path.join(run_dir, 'submit_config.pkl'), 'rb') as f: + data = pickle.load(f) + data = data.get('run_func_kwargs', {}) + return dict(train=data, dataset=data.get('dataset_args', {})) + +#---------------------------------------------------------------------------- +# Size and contents of the image snapshot grids that are exported +# periodically during training. + +def setup_snapshot_image_grid(training_set, + size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display. + layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label. + + # Select size. + gw = 1; gh = 1 + if size == '1080p': + gw = np.clip(1920 // training_set.shape[2], 3, 32) + gh = np.clip(1080 // training_set.shape[1], 2, 32) + if size == '4k': + gw = np.clip(3840 // training_set.shape[2], 7, 32) + gh = np.clip(2160 // training_set.shape[1], 4, 32) + if size == '8k': + gw = np.clip(7680 // training_set.shape[2], 7, 32) + gh = np.clip(4320 // training_set.shape[1], 4, 32) + + # Initialize data arrays. + reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) + labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) + + # Random layout. + if layout == 'random': + reals[:], labels[:] = training_set.get_minibatch_np(gw * gh) + + # Class-conditional layouts. + class_layouts = dict(row_per_class=[gw,1], col_per_class=[1,gh], class4x4=[4,4]) + if layout in class_layouts: + bw, bh = class_layouts[layout] + nw = (gw - 1) // bw + 1 + nh = (gh - 1) // bh + 1 + blocks = [[] for _i in range(nw * nh)] + for _iter in range(1000000): + real, label = training_set.get_minibatch_np(1) + idx = np.argmax(label[0]) + while idx < len(blocks) and len(blocks[idx]) >= bw * bh: + idx += training_set.label_size + if idx < len(blocks): + blocks[idx].append((real, label)) + if all(len(block) >= bw * bh for block in blocks): + break + for i, block in enumerate(blocks): + for j, (real, label) in enumerate(block): + x = (i % nw) * bw + j % bw + y = (i // nw) * bh + j // bw + if x < gw and y < gh: + reals[x + y * gw] = real[0] + labels[x + y * gw] = label[0] + + return (gw, gh), reals, labels + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/networks_stylegan.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/networks_stylegan.py new file mode 100644 index 0000000000000000000000000000000000000000..76ce31caa0890becebdfc481d92ec81d0023f999 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/networks_stylegan.py @@ -0,0 +1,660 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Network architectures used in the StyleGAN paper.""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +# NOTE: Do not import any application-specific modules here! +# Specify all network parameters as kwargs. + +#---------------------------------------------------------------------------- +# Primitive ops for manipulating 4D activation tensors. +# The gradients of these are not necessary efficient or even meaningful. + +def _blur2d(x, f=[1,2,1], normalize=True, flip=False, stride=1): + assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) + assert isinstance(stride, int) and stride >= 1 + + # Finalize filter kernel. + f = np.array(f, dtype=np.float32) + if f.ndim == 1: + f = f[:, np.newaxis] * f[np.newaxis, :] + assert f.ndim == 2 + if normalize: + f /= np.sum(f) + if flip: + f = f[::-1, ::-1] + f = f[:, :, np.newaxis, np.newaxis] + f = np.tile(f, [1, 1, int(x.shape[1]), 1]) + + # No-op => early exit. + if f.shape == (1, 1) and f[0,0] == 1: + return x + + # Convolve using depthwise_conv2d. + orig_dtype = x.dtype + x = tf.cast(x, tf.float32) # tf.nn.depthwise_conv2d() doesn't support fp16 + f = tf.constant(f, dtype=x.dtype, name='filter') + strides = [1, 1, stride, stride] + x = tf.nn.depthwise_conv2d(x, f, strides=strides, padding='SAME', data_format='NCHW') + x = tf.cast(x, orig_dtype) + return x + +def _upscale2d(x, factor=2, gain=1): + assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) + assert isinstance(factor, int) and factor >= 1 + + # Apply gain. + if gain != 1: + x *= gain + + # No-op => early exit. + if factor == 1: + return x + + # Upscale using tf.tile(). + s = x.shape + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x + +def _downscale2d(x, factor=2, gain=1): + assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) + assert isinstance(factor, int) and factor >= 1 + + # 2x2, float32 => downscale using _blur2d(). + if factor == 2 and x.dtype == tf.float32: + f = [np.sqrt(gain) / factor] * factor + return _blur2d(x, f=f, normalize=False, stride=factor) + + # Apply gain. + if gain != 1: + x *= gain + + # No-op => early exit. + if factor == 1: + return x + + # Large factor => downscale using tf.nn.avg_pool(). + # NOTE: Requires tf_config['graph_options.place_pruned_graph']=True to work. + ksize = [1, 1, factor, factor] + return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') + +#---------------------------------------------------------------------------- +# High-level ops for manipulating 4D activation tensors. +# The gradients of these are meant to be as efficient as possible. + +def blur2d(x, f=[1,2,1], normalize=True): + with tf.variable_scope('Blur2D'): + @tf.custom_gradient + def func(x): + y = _blur2d(x, f, normalize) + @tf.custom_gradient + def grad(dy): + dx = _blur2d(dy, f, normalize, flip=True) + return dx, lambda ddx: _blur2d(ddx, f, normalize) + return y, grad + return func(x) + +def upscale2d(x, factor=2): + with tf.variable_scope('Upscale2D'): + @tf.custom_gradient + def func(x): + y = _upscale2d(x, factor) + @tf.custom_gradient + def grad(dy): + dx = _downscale2d(dy, factor, gain=factor**2) + return dx, lambda ddx: _upscale2d(ddx, factor) + return y, grad + return func(x) + +def downscale2d(x, factor=2): + with tf.variable_scope('Downscale2D'): + @tf.custom_gradient + def func(x): + y = _downscale2d(x, factor) + @tf.custom_gradient + def grad(dy): + dx = _upscale2d(dy, factor, gain=1/factor**2) + return dx, lambda ddx: _downscale2d(ddx, factor) + return y, grad + return func(x) + +#---------------------------------------------------------------------------- +# Get/create weight tensor for a convolutional or fully-connected layer. + +def get_weight(shape, gain=np.sqrt(2), use_wscale=False, lrmul=1): + fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out] + he_std = gain / np.sqrt(fan_in) # He init + + # Equalized learning rate and custom learning rate multiplier. + if use_wscale: + init_std = 1.0 / lrmul + runtime_coef = he_std * lrmul + else: + init_std = he_std / lrmul + runtime_coef = lrmul + + # Create variable. + init = tf.initializers.random_normal(0, init_std) + return tf.get_variable('weight', shape=shape, initializer=init) * runtime_coef + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense(x, fmaps, **kwargs): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], **kwargs) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# Convolutional layer. + +def conv2d(x, fmaps, kernel, **kwargs): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Fused convolution + scaling. +# Faster and uses less memory than performing the operations separately. + +def upscale2d_conv2d(x, fmaps, kernel, fused_scale='auto', **kwargs): + assert kernel >= 1 and kernel % 2 == 1 + assert fused_scale in [True, False, 'auto'] + if fused_scale == 'auto': + fused_scale = min(x.shape[2:]) * 2 >= 128 + + # Not fused => call the individual ops directly. + if not fused_scale: + return conv2d(upscale2d(x), fmaps, kernel, **kwargs) + + # Fused => perform both ops simultaneously using tf.nn.conv2d_transpose(). + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) + w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in] + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) + w = tf.cast(w, x.dtype) + os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2] + return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +def conv2d_downscale2d(x, fmaps, kernel, fused_scale='auto', **kwargs): + assert kernel >= 1 and kernel % 2 == 1 + assert fused_scale in [True, False, 'auto'] + if fused_scale == 'auto': + fused_scale = min(x.shape[2:]) >= 128 + + # Not fused => call the individual ops directly. + if not fused_scale: + return downscale2d(conv2d(x, fmaps, kernel, **kwargs)) + + # Fused => perform both ops simultaneously using tf.nn.conv2d(). + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25 + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Apply bias to the given activation tensor. + +def apply_bias(x, lrmul=1): + b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul + b = tf.cast(b, x.dtype) + if len(x.shape) == 2: + return x + b + return x + tf.reshape(b, [1, -1, 1, 1]) + +#---------------------------------------------------------------------------- +# Leaky ReLU activation. More efficient than tf.nn.leaky_relu() and supports FP16. + +def leaky_relu(x, alpha=0.2): + with tf.variable_scope('LeakyReLU'): + alpha = tf.constant(alpha, dtype=x.dtype, name='alpha') + @tf.custom_gradient + def func(x): + y = tf.maximum(x, x * alpha) + @tf.custom_gradient + def grad(dy): + dx = tf.where(y >= 0, dy, dy * alpha) + return dx, lambda ddx: tf.where(y >= 0, ddx, ddx * alpha) + return y, grad + return func(x) + +#---------------------------------------------------------------------------- +# Pixelwise feature vector normalization. + +def pixel_norm(x, epsilon=1e-8): + with tf.variable_scope('PixelNorm'): + epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon') + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon) + +#---------------------------------------------------------------------------- +# Instance normalization. + +def instance_norm(x, epsilon=1e-8): + assert len(x.shape) == 4 # NCHW + with tf.variable_scope('InstanceNorm'): + orig_dtype = x.dtype + x = tf.cast(x, tf.float32) + x -= tf.reduce_mean(x, axis=[2,3], keepdims=True) + epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon') + x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=[2,3], keepdims=True) + epsilon) + x = tf.cast(x, orig_dtype) + return x + +#---------------------------------------------------------------------------- +# Style modulation. + +def style_mod(x, dlatent, **kwargs): + with tf.variable_scope('StyleMod'): + style = apply_bias(dense(dlatent, fmaps=x.shape[1]*2, gain=1, **kwargs)) + style = tf.reshape(style, [-1, 2, x.shape[1]] + [1] * (len(x.shape) - 2)) + return x * (style[:,0] + 1) + style[:,1] + +#---------------------------------------------------------------------------- +# Noise input. + +def apply_noise(x, noise_var=None, randomize_noise=True): + assert len(x.shape) == 4 # NCHW + with tf.variable_scope('Noise'): + if noise_var is None or randomize_noise: + noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) + else: + noise = tf.cast(noise_var, x.dtype) + weight = tf.get_variable('weight', shape=[x.shape[1].value], initializer=tf.initializers.zeros()) + return x + noise * tf.reshape(tf.cast(weight, x.dtype), [1, -1, 1, 1]) + +#---------------------------------------------------------------------------- +# Minibatch standard deviation. + +def minibatch_stddev_layer(x, group_size=4, num_new_features=1): + with tf.variable_scope('MinibatchStddev'): + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + s = x.shape # [NCHW] Input shape. + y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c. + y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32. + y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels. + y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups + y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type. + y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. + +#---------------------------------------------------------------------------- +# Style-based generator used in the StyleGAN paper. +# Composed of two sub-networks (G_mapping and G_synthesis) that are defined below. + +def G_style( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + truncation_psi = 0.7, # Style strength multiplier for the truncation trick. None = disable. + truncation_cutoff = 8, # Number of layers for which to apply the truncation trick. None = disable. + truncation_psi_val = None, # Value for truncation_psi to use during validation. + truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. + dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. + style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. + is_training = False, # Network is under training? Enables and disables specific features. + is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. + **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis). + + # Validate arguments. + assert not is_training or not is_validation + assert isinstance(components, dnnlib.EasyDict) + if is_validation: + truncation_psi = truncation_psi_val + truncation_cutoff = truncation_cutoff_val + if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): + truncation_psi = None + if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): + truncation_cutoff = None + if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): + dlatent_avg_beta = None + if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): + style_mixing_prob = None + + # Setup components. + if 'synthesis' not in components: + components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs) + num_layers = components.synthesis.input_shape[1] + dlatent_size = components.synthesis.input_shape[2] + if 'mapping' not in components: + components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) + + # Setup variables. + lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) + dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) + + # Evaluate mapping network. + dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) + + # Update moving average of W. + if dlatent_avg_beta is not None: + with tf.variable_scope('DlatentAvg'): + batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) + update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) + with tf.control_dependencies([update_op]): + dlatents = tf.identity(dlatents) + + # Perform style mixing regularization. + if style_mixing_prob is not None: + with tf.name_scope('StyleMix'): + latents2 = tf.random_normal(tf.shape(latents_in)) + dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs) + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 + mixing_cutoff = tf.cond( + tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, + lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), + lambda: cur_layers) + dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) + + # Apply truncation trick. + if truncation_psi is not None and truncation_cutoff is not None: + with tf.variable_scope('Truncation'): + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + ones = np.ones(layer_idx.shape, dtype=np.float32) + coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) + dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) + + # Evaluate synthesis network. + with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]): + images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Mapping network used in the StyleGAN paper. + +def G_mapping( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + latent_size = 512, # Latent vector (Z) dimensionality. + label_size = 0, # Label dimensionality, 0 if no labels. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. + mapping_layers = 8, # Number of mapping layers. + mapping_fmaps = 512, # Number of activations in the mapping layers. + mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. + mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'. + use_wscale = True, # Enable equalized learning rate? + normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? + dtype = 'float32', # Data type to use for activations and outputs. + **_kwargs): # Ignore unrecognized keyword args. + + act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[mapping_nonlinearity] + + # Inputs. + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + latents_in = tf.cast(latents_in, dtype) + labels_in = tf.cast(labels_in, dtype) + x = latents_in + + # Embed labels and concatenate them with latents. + if label_size: + with tf.variable_scope('LabelConcat'): + w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal()) + y = tf.matmul(labels_in, tf.cast(w, dtype)) + x = tf.concat([x, y], axis=1) + + # Normalize latents. + if normalize_latents: + x = pixel_norm(x) + + # Mapping layers. + for layer_idx in range(mapping_layers): + with tf.variable_scope('Dense%d' % layer_idx): + fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps + x = dense(x, fmaps=fmaps, gain=gain, use_wscale=use_wscale, lrmul=mapping_lrmul) + x = apply_bias(x, lrmul=mapping_lrmul) + x = act(x) + + # Broadcast. + if dlatent_broadcast is not None: + with tf.variable_scope('Broadcast'): + x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) + + # Output. + assert x.dtype == tf.as_dtype(dtype) + return tf.identity(x, name='dlatents_out') + +#---------------------------------------------------------------------------- +# Synthesis network used in the StyleGAN paper. + +def G_synthesis( + dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + num_channels = 3, # Number of output color channels. + resolution = 1024, # Output resolution. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + use_styles = True, # Enable style inputs? + const_input_layer = True, # First layer is a learned constant? + use_noise = True, # Enable noise inputs? + randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu' + use_wscale = True, # Enable equalized learning rate? + use_pixel_norm = False, # Enable pixelwise feature vector normalization? + use_instance_norm = True, # Enable instance normalization? + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. + blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering. + structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + def blur(x): return blur2d(x, blur_filter) if blur_filter else x + if is_template_graph: force_clean_graph = True + if force_clean_graph: randomize_noise = False + if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' + act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] + num_layers = resolution_log2 * 2 - 2 + num_styles = num_layers if use_styles else 1 + images_out = None + + # Primary inputs. + dlatents_in.set_shape([None, num_styles, dlatent_size]) + dlatents_in = tf.cast(dlatents_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) + + # Noise inputs. + noise_inputs = [] + if use_noise: + for layer_idx in range(num_layers): + res = layer_idx // 2 + 2 + shape = [1, use_noise, 2**res, 2**res] + noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) + + # Things to do at the end of each layer. + def layer_epilogue(x, layer_idx): + if use_noise: + x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise) + x = apply_bias(x) + x = act(x) + if use_pixel_norm: + x = pixel_norm(x) + if use_instance_norm: + x = instance_norm(x) + if use_styles: + x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale) + return x + + # Early layers. + with tf.variable_scope('4x4'): + if const_input_layer: + with tf.variable_scope('Const'): + x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones()) + x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0) + else: + with tf.variable_scope('Dense'): + x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale) # tweak gain to match the official implementation of Progressing GAN + x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0) + with tf.variable_scope('Conv'): + x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1) + + # Building blocks for remaining layers. + def block(res, x): # res = 3..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + with tf.variable_scope('Conv0_up'): + x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4) + with tf.variable_scope('Conv1'): + x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3) + return x + def torgb(res, x): # res = 2..resolution_log2 + lod = resolution_log2 - res + with tf.variable_scope('ToRGB_lod%d' % lod): + return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) + + # Fixed structure: simple and efficient, but does not support progressive growing. + if structure == 'fixed': + for res in range(3, resolution_log2 + 1): + x = block(res, x) + images_out = torgb(resolution_log2, x) + + # Linear structure: simple but inefficient. + if structure == 'linear': + images_out = torgb(2, x) + for res in range(3, resolution_log2 + 1): + lod = resolution_log2 - res + x = block(res, x) + img = torgb(res, x) + images_out = upscale2d(images_out) + with tf.variable_scope('Grow_lod%d' % lod): + images_out = tflib.lerp_clip(img, images_out, lod_in - lod) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def cset(cur_lambda, new_cond, new_lambda): + return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + def grow(x, res, lod): + y = block(res, x) + img = lambda: upscale2d(torgb(res, y), 2**lod) + img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod)) + if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) + return img() + images_out = grow(x, 3, resolution_log2 - 3) + + assert images_out.dtype == tf.as_dtype(dtype) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Discriminator used in the StyleGAN paper. + +def D_basic( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 1, # Number of input color channels. Overridden based on dataset. + resolution = 32, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', + use_wscale = True, # Enable equalized learning rate? + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. + mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. + blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering. + structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + def blur(x): return blur2d(x, blur_filter) if blur_filter else x + if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' + act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] + + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + scores_out = None + + # Building blocks. + def fromrgb(x, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): + return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale))) + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if res >= 3: # 8x8 and up + with tf.variable_scope('Conv0'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) + with tf.variable_scope('Conv1_down'): + x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale))) + else: # 4x4 + if mbstd_group_size > 1: + x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) + with tf.variable_scope('Conv'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) + with tf.variable_scope('Dense0'): + x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale))) + with tf.variable_scope('Dense1'): + x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale)) + return x + + # Fixed structure: simple and efficient, but does not support progressive growing. + if structure == 'fixed': + x = fromrgb(images_in, resolution_log2) + for res in range(resolution_log2, 2, -1): + x = block(x, res) + scores_out = block(x, 2) + + # Linear structure: simple but inefficient. + if structure == 'linear': + img = images_in + x = fromrgb(img, resolution_log2) + for res in range(resolution_log2, 2, -1): + lod = resolution_log2 - res + x = block(x, res) + img = downscale2d(img) + y = fromrgb(img, res - 1) + with tf.variable_scope('Grow_lod%d' % lod): + x = tflib.lerp_clip(x, y, lod_in - lod) + scores_out = block(x, 2) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def cset(cur_lambda, new_cond, new_lambda): + return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + def grow(res, lod): + x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) + if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) + x = block(x(), res); y = lambda: x + if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) + return y() + scores_out = grow(2, resolution_log2 - 2) + + # Label conditioning from "Which Training Methods for GANs do actually Converge?" + if label_size: + with tf.variable_scope('LabelSwitch'): + scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True) + + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/networks_stylegan2.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/networks_stylegan2.py new file mode 100644 index 0000000000000000000000000000000000000000..aa435cfda1c92730b9a5c4c517b12f2c6a6d9a5d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/networks_stylegan2.py @@ -0,0 +1,697 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Network architectures used in the StyleGAN2 paper.""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.ops.upfirdn_2d import upsample_2d, downsample_2d, upsample_conv_2d, conv_downsample_2d +from dnnlib.tflib.ops.fused_bias_act import fused_bias_act + +# NOTE: Do not import any application-specific modules here! +# Specify all network parameters as kwargs. + +#---------------------------------------------------------------------------- +# Get/create weight tensor for a convolution or fully-connected layer. + +def get_weight(shape, gain=1, use_wscale=True, lrmul=1, weight_var='weight'): + fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out] + he_std = gain / np.sqrt(fan_in) # He init + + # Equalized learning rate and custom learning rate multiplier. + if use_wscale: + init_std = 1.0 / lrmul + runtime_coef = he_std * lrmul + else: + init_std = he_std / lrmul + runtime_coef = lrmul + + # Create variable. + init = tf.initializers.random_normal(0, init_std) + return tf.get_variable(weight_var, shape=shape, initializer=init) * runtime_coef + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense_layer(x, fmaps, gain=1, use_wscale=True, lrmul=1, weight_var='weight'): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# Convolution layer with optional upsampling or downsampling. + +def conv2d_layer(x, fmaps, kernel, up=False, down=False, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, weight_var='weight'): + assert not (up and down) + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var) + if up: + x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) + elif down: + x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) + else: + x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME') + return x + +#---------------------------------------------------------------------------- +# Apply bias and activation func. + +def apply_bias_act(x, act='linear', alpha=None, gain=None, lrmul=1, bias_var='bias'): + b = tf.get_variable(bias_var, shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul + return fused_bias_act(x, b=tf.cast(b, x.dtype), act=act, alpha=alpha, gain=gain) + +#---------------------------------------------------------------------------- +# Naive upsampling (nearest neighbor) and downsampling (average pooling). + +def naive_upsample_2d(x, factor=2): + with tf.variable_scope('NaiveUpsample'): + _N, C, H, W = x.shape.as_list() + x = tf.reshape(x, [-1, C, H, 1, W, 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + return tf.reshape(x, [-1, C, H * factor, W * factor]) + +def naive_downsample_2d(x, factor=2): + with tf.variable_scope('NaiveDownsample'): + _N, C, H, W = x.shape.as_list() + x = tf.reshape(x, [-1, C, H // factor, factor, W // factor, factor]) + return tf.reduce_mean(x, axis=[3,5]) + +#---------------------------------------------------------------------------- +# Modulated convolution layer. + +def modulated_conv2d_layer(x, y, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, fused_modconv=True, weight_var='weight', mod_weight_var='mod_weight', mod_bias_var='mod_bias'): + assert not (up and down) + assert kernel >= 1 and kernel % 2 == 1 + + # Get weight. + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var) + ww = w[np.newaxis] # [BkkIO] Introduce minibatch dimension. + + # Modulate. + s = dense_layer(y, fmaps=x.shape[1].value, weight_var=mod_weight_var) # [BI] Transform incoming W to style. + s = apply_bias_act(s, bias_var=mod_bias_var) + 1 # [BI] Add bias (initially 1). + ww *= tf.cast(s[:, np.newaxis, np.newaxis, :, np.newaxis], w.dtype) # [BkkIO] Scale input feature maps. + + # Demodulate. + if demodulate: + d = tf.rsqrt(tf.reduce_sum(tf.square(ww), axis=[1,2,3]) + 1e-8) # [BO] Scaling factor. + ww *= d[:, np.newaxis, np.newaxis, np.newaxis, :] # [BkkIO] Scale output feature maps. + + # Reshape/scale input. + if fused_modconv: + x = tf.reshape(x, [1, -1, x.shape[2], x.shape[3]]) # Fused => reshape minibatch to convolution groups. + w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [ww.shape[1], ww.shape[2], ww.shape[3], -1]) + else: + x *= tf.cast(s[:, :, np.newaxis, np.newaxis], x.dtype) # [BIhw] Not fused => scale input activations. + + # Convolution with optional up/downsampling. + if up: + x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) + elif down: + x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) + else: + x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME') + + # Reshape/scale output. + if fused_modconv: + x = tf.reshape(x, [-1, fmaps, x.shape[2], x.shape[3]]) # Fused => reshape convolution groups back to minibatch. + elif demodulate: + x *= tf.cast(d[:, :, np.newaxis, np.newaxis], x.dtype) # [BOhw] Not fused => scale output activations. + return x + +#---------------------------------------------------------------------------- +# Minibatch standard deviation layer. + +def minibatch_stddev_layer(x, group_size=4, num_new_features=1): + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + s = x.shape # [NCHW] Input shape. + y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c. + y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32. + y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels. + y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups + y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type. + y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. + +#---------------------------------------------------------------------------- +# Main generator network. +# Composed of two sub-networks (mapping and synthesis) that are defined below. +# Used in configs B-F (Table 1). + +def G_main( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + truncation_psi = 0.5, # Style strength multiplier for the truncation trick. None = disable. + truncation_cutoff = None, # Number of layers for which to apply the truncation trick. None = disable. + truncation_psi_val = None, # Value for truncation_psi to use during validation. + truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. + dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. + style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. + is_training = False, # Network is under training? Enables and disables specific features. + is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. + return_dlatents = False, # Return dlatents in addition to the images? + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. + mapping_func = 'G_mapping', # Build func name for the mapping network. + synthesis_func = 'G_synthesis_stylegan2', # Build func name for the synthesis network. + **kwargs): # Arguments for sub-networks (mapping and synthesis). + + # Validate arguments. + assert not is_training or not is_validation + assert isinstance(components, dnnlib.EasyDict) + if is_validation: + truncation_psi = truncation_psi_val + truncation_cutoff = truncation_cutoff_val + if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): + truncation_psi = None + if is_training: + truncation_cutoff = None + if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): + dlatent_avg_beta = None + if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): + style_mixing_prob = None + + # Setup components. + if 'synthesis' not in components: + components.synthesis = tflib.Network('G_synthesis', func_name=globals()[synthesis_func], **kwargs) + num_layers = components.synthesis.input_shape[1] + dlatent_size = components.synthesis.input_shape[2] + if 'mapping' not in components: + components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs) + + # Setup variables. + lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) + dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) + + # Evaluate mapping network. + dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs) + dlatents = tf.cast(dlatents, tf.float32) + + # Update moving average of W. + if dlatent_avg_beta is not None: + with tf.variable_scope('DlatentAvg'): + batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) + update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) + with tf.control_dependencies([update_op]): + dlatents = tf.identity(dlatents) + + # Perform style mixing regularization. + if style_mixing_prob is not None: + with tf.variable_scope('StyleMix'): + latents2 = tf.random_normal(tf.shape(latents_in)) + dlatents2 = components.mapping.get_output_for(latents2, labels_in, is_training=is_training, **kwargs) + dlatents2 = tf.cast(dlatents2, tf.float32) + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 + mixing_cutoff = tf.cond( + tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, + lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), + lambda: cur_layers) + dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) + + # Apply truncation trick. + if truncation_psi is not None: + with tf.variable_scope('Truncation'): + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + layer_psi = np.ones(layer_idx.shape, dtype=np.float32) + if truncation_cutoff is None: + layer_psi *= truncation_psi + else: + layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi) + dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi) + + # Evaluate synthesis network. + deps = [] + if 'lod' in components.synthesis.vars: + deps.append(tf.assign(components.synthesis.vars['lod'], lod_in)) + with tf.control_dependencies(deps): + images_out = components.synthesis.get_output_for(dlatents, is_training=is_training, force_clean_graph=is_template_graph, **kwargs) + + # Return requested outputs. + images_out = tf.identity(images_out, name='images_out') + if return_dlatents: + return images_out, dlatents + return images_out + +#---------------------------------------------------------------------------- +# Mapping network. +# Transforms the input latent code (z) to the disentangled latent code (w). +# Used in configs B-F (Table 1). + +def G_mapping( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + latent_size = 512, # Latent vector (Z) dimensionality. + label_size = 0, # Label dimensionality, 0 if no labels. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. + mapping_layers = 8, # Number of mapping layers. + mapping_fmaps = 512, # Number of activations in the mapping layers. + mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. + mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? + dtype = 'float32', # Data type to use for activations and outputs. + **_kwargs): # Ignore unrecognized keyword args. + + act = mapping_nonlinearity + + # Inputs. + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + latents_in = tf.cast(latents_in, dtype) + labels_in = tf.cast(labels_in, dtype) + x = latents_in + + # Embed labels and concatenate them with latents. + if label_size: + with tf.variable_scope('LabelConcat'): + w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal()) + y = tf.matmul(labels_in, tf.cast(w, dtype)) + x = tf.concat([x, y], axis=1) + + # Normalize latents. + if normalize_latents: + with tf.variable_scope('Normalize'): + x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + 1e-8) + + # Mapping layers. + for layer_idx in range(mapping_layers): + with tf.variable_scope('Dense%d' % layer_idx): + fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps + x = apply_bias_act(dense_layer(x, fmaps=fmaps, lrmul=mapping_lrmul), act=act, lrmul=mapping_lrmul) + + # Broadcast. + if dlatent_broadcast is not None: + with tf.variable_scope('Broadcast'): + x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) + + # Output. + assert x.dtype == tf.as_dtype(dtype) + return tf.identity(x, name='dlatents_out') + +#---------------------------------------------------------------------------- +# StyleGAN synthesis network with revised architecture (Figure 2d). +# Implements progressive growing, but no skip connections or residual nets (Figure 7). +# Used in configs B-D (Table 1). + +def G_synthesis_stylegan_revised( + dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + num_channels = 3, # Number of output color channels. + resolution = 1024, # Output resolution. + fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + dtype = 'float32', # Data type to use for activations and outputs. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. + fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op? + structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + if is_template_graph: force_clean_graph = True + if force_clean_graph: randomize_noise = False + if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' + act = nonlinearity + num_layers = resolution_log2 * 2 - 2 + images_out = None + + # Primary inputs. + dlatents_in.set_shape([None, num_layers, dlatent_size]) + dlatents_in = tf.cast(dlatents_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) + + # Noise inputs. + noise_inputs = [] + for layer_idx in range(num_layers - 1): + res = (layer_idx + 5) // 2 + shape = [1, 1, 2**res, 2**res] + noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) + + # Single convolution layer with all the bells and whistles. + def layer(x, layer_idx, fmaps, kernel, up=False): + x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) + if randomize_noise: + noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) + else: + noise = tf.cast(noise_inputs[layer_idx], x.dtype) + noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) + x += noise * tf.cast(noise_strength, x.dtype) + return apply_bias_act(x, act=act) + + # Early layers. + with tf.variable_scope('4x4'): + with tf.variable_scope('Const'): + x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal()) + x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) + with tf.variable_scope('Conv'): + x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) + + # Building blocks for remaining layers. + def block(res, x): # res = 3..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + with tf.variable_scope('Conv0_up'): + x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) + with tf.variable_scope('Conv1'): + x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) + return x + def torgb(res, x): # res = 2..resolution_log2 + with tf.variable_scope('ToRGB_lod%d' % (resolution_log2 - res)): + return apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv)) + + # Fixed structure: simple and efficient, but does not support progressive growing. + if structure == 'fixed': + for res in range(3, resolution_log2 + 1): + x = block(res, x) + images_out = torgb(resolution_log2, x) + + # Linear structure: simple but inefficient. + if structure == 'linear': + images_out = torgb(2, x) + for res in range(3, resolution_log2 + 1): + lod = resolution_log2 - res + x = block(res, x) + img = torgb(res, x) + with tf.variable_scope('Upsample_lod%d' % lod): + images_out = upsample_2d(images_out) + with tf.variable_scope('Grow_lod%d' % lod): + images_out = tflib.lerp_clip(img, images_out, lod_in - lod) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def cset(cur_lambda, new_cond, new_lambda): + return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + def grow(x, res, lod): + y = block(res, x) + img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod) + img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod)) + if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) + return img() + images_out = grow(x, 3, resolution_log2 - 3) + + assert images_out.dtype == tf.as_dtype(dtype) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# StyleGAN2 synthesis network (Figure 7). +# Implements skip connections and residual nets (Figure 7), but no progressive growing. +# Used in configs E-F (Table 1). + +def G_synthesis_stylegan2( + dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + num_channels = 3, # Number of output color channels. + resolution = 1024, # Output resolution. + fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. + architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + dtype = 'float32', # Data type to use for activations and outputs. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. + fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op? + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + assert architecture in ['orig', 'skip', 'resnet'] + act = nonlinearity + num_layers = resolution_log2 * 2 - 2 + images_out = None + + # Primary inputs. + dlatents_in.set_shape([None, num_layers, dlatent_size]) + dlatents_in = tf.cast(dlatents_in, dtype) + + # Noise inputs. + noise_inputs = [] + for layer_idx in range(num_layers - 1): + res = (layer_idx + 5) // 2 + shape = [1, 1, 2**res, 2**res] + noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) + + # Single convolution layer with all the bells and whistles. + def layer(x, layer_idx, fmaps, kernel, up=False): + x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) + if randomize_noise: + noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) + else: + noise = tf.cast(noise_inputs[layer_idx], x.dtype) + noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) + x += noise * tf.cast(noise_strength, x.dtype) + return apply_bias_act(x, act=act) + + # Building blocks for main layers. + def block(x, res): # res = 3..resolution_log2 + t = x + with tf.variable_scope('Conv0_up'): + x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) + with tf.variable_scope('Conv1'): + x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) + if architecture == 'resnet': + with tf.variable_scope('Skip'): + t = conv2d_layer(t, fmaps=nf(res-1), kernel=1, up=True, resample_kernel=resample_kernel) + x = (x + t) * (1 / np.sqrt(2)) + return x + def upsample(y): + with tf.variable_scope('Upsample'): + return upsample_2d(y, k=resample_kernel) + def torgb(x, y, res): # res = 2..resolution_log2 + with tf.variable_scope('ToRGB'): + t = apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv)) + return t if y is None else y + t + + # Early layers. + y = None + with tf.variable_scope('4x4'): + with tf.variable_scope('Const'): + x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal()) + x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) + with tf.variable_scope('Conv'): + x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) + if architecture == 'skip': + y = torgb(x, y, 2) + + # Main layers. + for res in range(3, resolution_log2 + 1): + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + x = block(x, res) + if architecture == 'skip': + y = upsample(y) + if architecture == 'skip' or res == resolution_log2: + y = torgb(x, y, res) + images_out = y + + assert images_out.dtype == tf.as_dtype(dtype) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Original StyleGAN discriminator. +# Used in configs B-D (Table 1). + +def D_stylegan( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 3, # Number of input color channels. Overridden based on dataset. + resolution = 1024, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. + mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. + dtype = 'float32', # Data type to use for activations and outputs. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. + structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' + act = nonlinearity + + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + + # Building blocks for spatial layers. + def fromrgb(x, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): + return apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=1), act=act) + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + with tf.variable_scope('Conv0'): + x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act) + with tf.variable_scope('Conv1_down'): + x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) + return x + + # Fixed structure: simple and efficient, but does not support progressive growing. + if structure == 'fixed': + x = fromrgb(images_in, resolution_log2) + for res in range(resolution_log2, 2, -1): + x = block(x, res) + + # Linear structure: simple but inefficient. + if structure == 'linear': + img = images_in + x = fromrgb(img, resolution_log2) + for res in range(resolution_log2, 2, -1): + lod = resolution_log2 - res + x = block(x, res) + with tf.variable_scope('Downsample_lod%d' % lod): + img = downsample_2d(img) + y = fromrgb(img, res - 1) + with tf.variable_scope('Grow_lod%d' % lod): + x = tflib.lerp_clip(x, y, lod_in - lod) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def cset(cur_lambda, new_cond, new_lambda): + return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + def grow(res, lod): + x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2**lod), res) + if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) + x = block(x(), res); y = lambda: x + y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(naive_downsample_2d(images_in, factor=2**(lod+1)), res - 1), lod_in - lod)) + return y() + x = grow(3, resolution_log2 - 3) + + # Final layers at 4x4 resolution. + with tf.variable_scope('4x4'): + if mbstd_group_size > 1: + with tf.variable_scope('MinibatchStddev'): + x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) + with tf.variable_scope('Conv'): + x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) + with tf.variable_scope('Dense0'): + x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act) + + # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?" + with tf.variable_scope('Output'): + x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1))) + if labels_in.shape[1] > 0: + x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) + scores_out = x + + # Output. + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- +# StyleGAN2 discriminator (Figure 7). +# Implements skip connections and residual nets (Figure 7), but no progressive growing. +# Used in configs E-F (Table 1). + +def D_stylegan2( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 3, # Number of input color channels. Overridden based on dataset. + resolution = 1024, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. + mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. + dtype = 'float32', # Data type to use for activations and outputs. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + assert architecture in ['orig', 'skip', 'resnet'] + act = nonlinearity + + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + + # Building blocks for main layers. + def fromrgb(x, y, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB'): + t = apply_bias_act(conv2d_layer(y, fmaps=nf(res-1), kernel=1), act=act) + return t if x is None else x + t + def block(x, res): # res = 2..resolution_log2 + t = x + with tf.variable_scope('Conv0'): + x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act) + with tf.variable_scope('Conv1_down'): + x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) + if architecture == 'resnet': + with tf.variable_scope('Skip'): + t = conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel) + x = (x + t) * (1 / np.sqrt(2)) + return x + def downsample(y): + with tf.variable_scope('Downsample'): + return downsample_2d(y, k=resample_kernel) + + # Main layers. + x = None + y = images_in + for res in range(resolution_log2, 2, -1): + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if architecture == 'skip' or res == resolution_log2: + x = fromrgb(x, y, res) + x = block(x, res) + if architecture == 'skip': + y = downsample(y) + + # Final layers. + with tf.variable_scope('4x4'): + if architecture == 'skip': + x = fromrgb(x, y, 2) + if mbstd_group_size > 1: + with tf.variable_scope('MinibatchStddev'): + x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) + with tf.variable_scope('Conv'): + x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) + with tf.variable_scope('Dense0'): + x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act) + + # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?" + with tf.variable_scope('Output'): + x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1))) + if labels_in.shape[1] > 0: + x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) + scores_out = x + + # Output. + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2_official/training/training_loop.py b/ContraCLIP/models/genforce/converters/stylegan2_official/training/training_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..c2d88cf03b8448ff57caa98515918920c6cfab35 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2_official/training/training_loop.py @@ -0,0 +1,356 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Main training script.""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +from training import dataset +from training import misc +from metrics import metric_base + +#---------------------------------------------------------------------------- +# Just-in-time processing of training images before feeding them to the networks. + +def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net): + with tf.name_scope('DynamicRange'): + x = tf.cast(x, tf.float32) + x = misc.adjust_dynamic_range(x, drange_data, drange_net) + if mirror_augment: + with tf.name_scope('MirrorAugment'): + x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3])) + with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail. + s = tf.shape(x) + y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) + y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) + y = tf.tile(y, [1, 1, 1, 2, 1, 2]) + y = tf.reshape(y, [-1, s[1], s[2], s[3]]) + x = tflib.lerp(x, y, lod - tf.floor(lod)) + with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks. + s = tf.shape(x) + factor = tf.cast(2 ** tf.floor(lod), tf.int32) + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x, labels + +#---------------------------------------------------------------------------- +# Evaluate time-varying training parameters. + +def training_schedule( + cur_nimg, + training_set, + lod_initial_resolution = None, # Image resolution used at the beginning. + lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution. + lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers. + minibatch_size_base = 32, # Global minibatch size. + minibatch_size_dict = {}, # Resolution-specific overrides. + minibatch_gpu_base = 4, # Number of samples processed at a time by one GPU. + minibatch_gpu_dict = {}, # Resolution-specific overrides. + G_lrate_base = 0.002, # Learning rate for the generator. + G_lrate_dict = {}, # Resolution-specific overrides. + D_lrate_base = 0.002, # Learning rate for the discriminator. + D_lrate_dict = {}, # Resolution-specific overrides. + lrate_rampup_kimg = 0, # Duration of learning rate ramp-up. + tick_kimg_base = 4, # Default interval of progress snapshots. + tick_kimg_dict = {8:28, 16:24, 32:20, 64:16, 128:12, 256:8, 512:6, 1024:4}): # Resolution-specific overrides. + + # Initialize result dict. + s = dnnlib.EasyDict() + s.kimg = cur_nimg / 1000.0 + + # Training phase. + phase_dur = lod_training_kimg + lod_transition_kimg + phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0 + phase_kimg = s.kimg - phase_idx * phase_dur + + # Level-of-detail and resolution. + if lod_initial_resolution is None: + s.lod = 0.0 + else: + s.lod = training_set.resolution_log2 + s.lod -= np.floor(np.log2(lod_initial_resolution)) + s.lod -= phase_idx + if lod_transition_kimg > 0: + s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg + s.lod = max(s.lod, 0.0) + s.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(s.lod))) + + # Minibatch size. + s.minibatch_size = minibatch_size_dict.get(s.resolution, minibatch_size_base) + s.minibatch_gpu = minibatch_gpu_dict.get(s.resolution, minibatch_gpu_base) + + # Learning rate. + s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base) + s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base) + if lrate_rampup_kimg > 0: + rampup = min(s.kimg / lrate_rampup_kimg, 1.0) + s.G_lrate *= rampup + s.D_lrate *= rampup + + # Other parameters. + s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base) + return s + +#---------------------------------------------------------------------------- +# Main training script. + +def training_loop( + G_args = {}, # Options for generator network. + D_args = {}, # Options for discriminator network. + G_opt_args = {}, # Options for generator optimizer. + D_opt_args = {}, # Options for discriminator optimizer. + G_loss_args = {}, # Options for generator loss. + D_loss_args = {}, # Options for discriminator loss. + dataset_args = {}, # Options for dataset.load_dataset(). + sched_args = {}, # Options for train.TrainingSchedule. + grid_args = {}, # Options for train.setup_snapshot_image_grid(). + metric_arg_list = [], # Options for MetricGroup. + tf_config = {}, # Options for tflib.init_tf(). + data_dir = None, # Directory to load datasets from. + G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights. + minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters. + lazy_regularization = True, # Perform regularization as a separate training step? + G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False. + D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False. + reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? + total_kimg = 25000, # Total length of the training, measured in thousands of real images. + mirror_augment = False, # Enable mirror augment? + drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. + image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'. + network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'. + save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file? + save_weight_histograms = False, # Include weight histograms in the tfevents file? + resume_pkl = None, # Network pickle to resume training from, None = train from scratch. + resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. + resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting. + resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training? + + # Initialize dnnlib and TensorFlow. + tflib.init_tf(tf_config) + num_gpus = dnnlib.submit_config.num_gpus + + # Load training set. + training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args) + grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args) + misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) + + # Construct or load networks. + with tf.device('/gpu:0'): + if resume_pkl is None or resume_with_new_nets: + print('Constructing networks...') + G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) + D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) + Gs = G.clone('Gs') + if resume_pkl is not None: + print('Loading networks from "%s"...' % resume_pkl) + rG, rD, rGs = misc.load_pkl(resume_pkl) + if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs) + else: G = rG; D = rD; Gs = rGs + + # Print layers and generate initial image snapshot. + G.print_layers(); D.print_layers() + sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args) + grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:]) + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) + misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size) + + # Setup training inputs. + print('Building TensorFlow graph...') + with tf.name_scope('Inputs'), tf.device('/cpu:0'): + lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) + lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) + minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[]) + minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[]) + minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus) + Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0 + + # Setup optimizers. + G_opt_args = dict(G_opt_args) + D_opt_args = dict(D_opt_args) + for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]: + args['minibatch_multiplier'] = minibatch_multiplier + args['learning_rate'] = lrate_in + if lazy_regularization: + mb_ratio = reg_interval / (reg_interval + 1) + args['learning_rate'] *= mb_ratio + if 'beta1' in args: args['beta1'] **= mb_ratio + if 'beta2' in args: args['beta2'] **= mb_ratio + G_opt = tflib.Optimizer(name='TrainG', **G_opt_args) + D_opt = tflib.Optimizer(name='TrainD', **D_opt_args) + G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args) + D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args) + + # Build training graph for each GPU. + data_fetch_ops = [] + for gpu in range(num_gpus): + with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): + + # Create GPU-specific shadow copies of G and D. + G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') + D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') + + # Fetch training data via temporary variables. + with tf.name_scope('DataFetch'): + sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args) + reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape)) + labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size])) + reals_write, labels_write = training_set.get_minibatch_tf() + reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net) + reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0) + labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0) + data_fetch_ops += [tf.assign(reals_var, reals_write)] + data_fetch_ops += [tf.assign(labels_var, labels_write)] + reals_read = reals_var[:minibatch_gpu_in] + labels_read = labels_var[:minibatch_gpu_in] + + # Evaluate loss functions. + lod_assign_ops = [] + if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)] + if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)] + with tf.control_dependencies(lod_assign_ops): + with tf.name_scope('G_loss'): + G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args) + with tf.name_scope('D_loss'): + D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args) + + # Register gradients. + if not lazy_regularization: + if G_reg is not None: G_loss += G_reg + if D_reg is not None: D_loss += D_reg + else: + if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables) + if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables) + G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) + D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) + + # Setup training ops. + data_fetch_op = tf.group(*data_fetch_ops) + G_train_op = G_opt.apply_updates() + D_train_op = D_opt.apply_updates() + G_reg_op = G_reg_opt.apply_updates(allow_no_op=True) + D_reg_op = D_reg_opt.apply_updates(allow_no_op=True) + Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta) + + # Finalize graph. + with tf.device('/gpu:0'): + try: + peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() + except tf.errors.NotFoundError: + peak_gpu_mem_op = tf.constant(0) + tflib.init_uninitialized_vars() + + print('Initializing logs...') + summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path()) + if save_tf_graph: + summary_log.add_graph(tf.get_default_graph()) + if save_weight_histograms: + G.setup_weight_histograms(); D.setup_weight_histograms() + metrics = metric_base.MetricGroup(metric_arg_list) + + print('Training for %d kimg...\n' % total_kimg) + dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg) + maintenance_time = dnnlib.RunContext.get().get_last_update_interval() + cur_nimg = int(resume_kimg * 1000) + cur_tick = -1 + tick_start_nimg = cur_nimg + prev_lod = -1.0 + running_mb_counter = 0 + while cur_nimg < total_kimg * 1000: + if dnnlib.RunContext.get().should_stop(): break + + # Choose training parameters and configure training ops. + sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args) + assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0 + training_set.configure(sched.minibatch_gpu, sched.lod) + if reset_opt_for_new_lod: + if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): + G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state() + prev_lod = sched.lod + + # Run training ops. + feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu} + for _repeat in range(minibatch_repeats): + rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus) + run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0) + run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0) + cur_nimg += sched.minibatch_size + running_mb_counter += 1 + + # Fast path without gradient accumulation. + if len(rounds) == 1: + tflib.run([G_train_op, data_fetch_op], feed_dict) + if run_G_reg: + tflib.run(G_reg_op, feed_dict) + tflib.run([D_train_op, Gs_update_op], feed_dict) + if run_D_reg: + tflib.run(D_reg_op, feed_dict) + + # Slow path with gradient accumulation. + else: + for _round in rounds: + tflib.run(G_train_op, feed_dict) + if run_G_reg: + for _round in rounds: + tflib.run(G_reg_op, feed_dict) + tflib.run(Gs_update_op, feed_dict) + for _round in rounds: + tflib.run(data_fetch_op, feed_dict) + tflib.run(D_train_op, feed_dict) + if run_D_reg: + for _round in rounds: + tflib.run(D_reg_op, feed_dict) + + # Perform maintenance tasks once per tick. + done = (cur_nimg >= total_kimg * 1000) + if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: + cur_tick += 1 + tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 + tick_start_nimg = cur_nimg + tick_time = dnnlib.RunContext.get().get_time_since_last_update() + total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time + + # Report progress. + print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % ( + autosummary('Progress/tick', cur_tick), + autosummary('Progress/kimg', cur_nimg / 1000.0), + autosummary('Progress/lod', sched.lod), + autosummary('Progress/minibatch', sched.minibatch_size), + dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)), + autosummary('Timing/sec_per_tick', tick_time), + autosummary('Timing/sec_per_kimg', tick_time / tick_kimg), + autosummary('Timing/maintenance_sec', maintenance_time), + autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30))) + autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) + autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) + + # Save snapshots. + if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done): + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) + misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) + if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done): + pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000)) + misc.save_pkl((G, D, Gs), pkl) + metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config) + + # Update summaries and RunContext. + metrics.update_autosummaries() + tflib.autosummary.save_summaries(summary_log, cur_nimg) + dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg) + maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time + + # Save final snapshot. + misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl')) + + # All done. + summary_log.close() + training_set.close() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_converter.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..7ea74621af938c332db943f56e0d4941e8cd148f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_converter.py @@ -0,0 +1,386 @@ +# python3.7 +"""Converts StyleGAN2-ADA-PyTorch model to match this repository. + +The models can be trained through OR released by the repository: + +https://github.com/NVlabs/stylegan2-ada-pytorch +""" + +import os +import sys +import re +import pickle +import warnings +from tqdm import tqdm +import numpy as np + +import torch + +from models import build_model +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image + +__all__ = ['convert_stylegan2ada_pth_weight'] + +GAN_TPYE = 'stylegan2' +OFFICIAL_CODE_DIR = 'stylegan2ada_pth_official' +BASE_DIR = os.path.dirname(os.path.relpath(__file__)) +CODE_PATH = os.path.join(BASE_DIR, OFFICIAL_CODE_DIR) + +TRUNC_PSI = 0.5 +TRUNC_LAYERS = 18 +RANDOMIZE_NOISE = False +NOISE_MODE = 'random' if RANDOMIZE_NOISE else 'const' + +# The following two dictionary of mapping patterns are modified from +# https://github.com/NVlabs/stylegan2-ada-pytorch/blob/main/legacy.py +G_PTH_TO_TF_VAR_MAPPING_PATTERN = { + r'mapping\.w_avg': + lambda: f'dlatent_avg', + r'mapping\.embed\.weight': + lambda: f'LabelEmbed/weight', + r'mapping\.embed\.bias': + lambda: f'LabelEmbed/bias', + r'mapping\.fc(\d+)\.weight': + lambda i: f'Dense{i}/weight', + r'mapping\.fc(\d+)\.bias': + lambda i: f'Dense{i}/bias', + r'synthesis\.b4\.const': + lambda: f'4x4/Const/const', + r'synthesis\.b4\.conv1\.weight': + lambda: f'4x4/Conv/weight', + r'synthesis\.b4\.conv1\.bias': + lambda: f'4x4/Conv/bias', + r'synthesis\.b4\.conv1\.noise_const': + lambda: f'noise0', + r'synthesis\.b4\.conv1\.noise_strength': + lambda: f'4x4/Conv/noise_strength', + r'synthesis\.b4\.conv1\.affine\.weight': + lambda: f'4x4/Conv/mod_weight', + r'synthesis\.b4\.conv1\.affine\.bias': + lambda: f'4x4/Conv/mod_bias', + r'synthesis\.b(\d+)\.conv0\.weight': + lambda r: f'{r}x{r}/Conv0_up/weight', + r'synthesis\.b(\d+)\.conv0\.bias': + lambda r: f'{r}x{r}/Conv0_up/bias', + r'synthesis\.b(\d+)\.conv0\.noise_const': + lambda r: f'noise{int(np.log2(int(r)))*2-5}', + r'synthesis\.b(\d+)\.conv0\.noise_strength': + lambda r: f'{r}x{r}/Conv0_up/noise_strength', + r'synthesis\.b(\d+)\.conv0\.affine\.weight': + lambda r: f'{r}x{r}/Conv0_up/mod_weight', + r'synthesis\.b(\d+)\.conv0\.affine\.bias': + lambda r: f'{r}x{r}/Conv0_up/mod_bias', + r'synthesis\.b(\d+)\.conv1\.weight': + lambda r: f'{r}x{r}/Conv1/weight', + r'synthesis\.b(\d+)\.conv1\.bias': + lambda r: f'{r}x{r}/Conv1/bias', + r'synthesis\.b(\d+)\.conv1\.noise_const': + lambda r: f'noise{int(np.log2(int(r)))*2-4}', + r'synthesis\.b(\d+)\.conv1\.noise_strength': + lambda r: f'{r}x{r}/Conv1/noise_strength', + r'synthesis\.b(\d+)\.conv1\.affine\.weight': + lambda r: f'{r}x{r}/Conv1/mod_weight', + r'synthesis\.b(\d+)\.conv1\.affine\.bias': + lambda r: f'{r}x{r}/Conv1/mod_bias', + r'synthesis\.b(\d+)\.torgb\.weight': + lambda r: f'{r}x{r}/ToRGB/weight', + r'synthesis\.b(\d+)\.torgb\.bias': + lambda r: f'{r}x{r}/ToRGB/bias', + r'synthesis\.b(\d+)\.torgb\.affine\.weight': + lambda r: f'{r}x{r}/ToRGB/mod_weight', + r'synthesis\.b(\d+)\.torgb\.affine\.bias': + lambda r: f'{r}x{r}/ToRGB/mod_bias', + r'synthesis\.b(\d+)\.skip\.weight': + lambda r: f'{r}x{r}/Skip/weight', + r'.*\.resample_filter': + None, +} +D_PTH_TO_TF_VAR_MAPPING_PATTERN = { + r'b(\d+)\.fromrgb\.weight': + lambda r: f'{r}x{r}/FromRGB/weight', + r'b(\d+)\.fromrgb\.bias': + lambda r: f'{r}x{r}/FromRGB/bias', + r'b(\d+)\.conv(\d+)\.weight': + lambda r, i: f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/weight', + r'b(\d+)\.conv(\d+)\.bias': + lambda r, i: f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/bias', + r'b(\d+)\.skip\.weight': + lambda r: f'{r}x{r}/Skip/weight', + r'mapping\.embed\.weight': + lambda: f'LabelEmbed/weight', + r'mapping\.embed\.bias': + lambda: f'LabelEmbed/bias', + r'mapping\.fc(\d+)\.weight': + lambda i: f'Mapping{i}/weight', + r'mapping\.fc(\d+)\.bias': + lambda i: f'Mapping{i}/bias', + r'b4\.conv\.weight': + lambda: f'4x4/Conv/weight', + r'b4\.conv\.bias': + lambda: f'4x4/Conv/bias', + r'b4\.fc\.weight': + lambda: f'4x4/Dense0/weight', + r'b4\.fc\.bias': + lambda: f'4x4/Dense0/bias', + r'b4\.out\.weight': + lambda: f'Output/weight', + r'b4\.out\.bias': + lambda: f'Output/bias', + r'.*\.resample_filter': + None, +} + + +def convert_stylegan2ada_pth_weight(src_weight_path, + dst_weight_path, + test_num=10, + save_test_image=False, + verbose=False): + """Converts the pre-trained StyleGAN2-ADA-PyTorch weights. + + Args: + src_weight_path: Path to the source model to load weights from. + dst_weight_path: Path to the target model to save converted weights. + test_num: Number of samples used to test the conversion. (default: 10) + save_test_image: Whether to save the test images. (default: False) + verbose: Whether to print verbose log message. (default: False) + """ + + print(f'========================================') + print(f'Loading source weights from `{src_weight_path}` ...') + sys.path.insert(0, CODE_PATH) + with open(src_weight_path, 'rb') as f: + model = pickle.load(f) + sys.path.pop(0) + print(f'Successfully loaded!') + print(f'--------------------') + + z_space_dim = model['G'].z_dim + label_size = model['G'].c_dim + w_space_dim = model['G'].w_dim + image_channels = model['G'].img_channels + resolution = model['G'].img_resolution + repeat_w = True + + print(f'Converting source weights (G) to target ...') + G_vars = dict(model['G'].named_parameters()) + G_vars.update(dict(model['G'].named_buffers())) + G = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=repeat_w, + image_channels=image_channels) + G_state_dict = G.state_dict() + official_tf_to_pth_var_mapping = {} + for name in G_vars.keys(): + for pattern, fn in G_PTH_TO_TF_VAR_MAPPING_PATTERN.items(): + match = re.fullmatch(pattern, name) + if match: + if fn is not None: + official_tf_to_pth_var_mapping[fn(*match.groups())] = name + break + for dst_var_name, tf_var_name in G.pth_to_tf_var_mapping.items(): + assert tf_var_name in official_tf_to_pth_var_mapping + assert dst_var_name in G_state_dict + src_var_name = official_tf_to_pth_var_mapping[tf_var_name] + assert src_var_name in G_vars + if verbose: + print(f' Converting `{src_var_name}` to `{dst_var_name}`.') + var = G_vars[src_var_name].data + if 'weight' in tf_var_name: + if 'Conv0_up/weight' in tf_var_name: + var = var.flip(2, 3) + elif 'Skip' in tf_var_name: + var = var.flip(2, 3) + if 'bias' in tf_var_name: + if 'mod_bias' in tf_var_name: + var = var - 1 + if 'Const' in tf_var_name: + var = var.unsqueeze(0) + if 'noise' in tf_var_name and 'noise_' not in tf_var_name: + var = var.unsqueeze(0).unsqueeze(0) + G_state_dict[dst_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting source weights (Gs) to target ...') + Gs_vars = dict(model['G_ema'].named_parameters()) + Gs_vars.update(dict(model['G_ema'].named_buffers())) + Gs = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=repeat_w, + image_channels=image_channels) + Gs_state_dict = Gs.state_dict() + official_tf_to_pth_var_mapping = {} + for name in Gs_vars.keys(): + for pattern, fn in G_PTH_TO_TF_VAR_MAPPING_PATTERN.items(): + match = re.fullmatch(pattern, name) + if match: + if fn is not None: + official_tf_to_pth_var_mapping[fn(*match.groups())] = name + break + for dst_var_name, tf_var_name in Gs.pth_to_tf_var_mapping.items(): + assert tf_var_name in official_tf_to_pth_var_mapping + assert dst_var_name in Gs_state_dict + src_var_name = official_tf_to_pth_var_mapping[tf_var_name] + assert src_var_name in Gs_vars + if verbose: + print(f' Converting `{src_var_name}` to `{dst_var_name}`.') + var = Gs_vars[src_var_name].data + if 'weight' in tf_var_name: + if 'Conv0_up/weight' in tf_var_name: + var = var.flip(2, 3) + elif 'Skip' in tf_var_name: + var = var.flip(2, 3) + if 'bias' in tf_var_name: + if 'mod_bias' in tf_var_name: + var = var - 1 + if 'Const' in tf_var_name: + var = var.unsqueeze(0) + if 'noise' in tf_var_name and 'noise_' not in tf_var_name: + var = var.unsqueeze(0).unsqueeze(0) + Gs_state_dict[dst_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting source weights (D) to target ...') + D_vars = dict(model['D'].named_parameters()) + D_vars.update(dict(model['D'].named_buffers())) + D = build_model(gan_type=GAN_TPYE, + module='discriminator', + resolution=resolution, + label_size=label_size, + image_channels=image_channels) + D_state_dict = D.state_dict() + official_tf_to_pth_var_mapping = {} + for name in D_vars.keys(): + for pattern, fn in D_PTH_TO_TF_VAR_MAPPING_PATTERN.items(): + match = re.fullmatch(pattern, name) + if match: + if fn is not None: + official_tf_to_pth_var_mapping[fn(*match.groups())] = name + break + for dst_var_name, tf_var_name in D.pth_to_tf_var_mapping.items(): + assert tf_var_name in official_tf_to_pth_var_mapping + assert dst_var_name in D_state_dict + src_var_name = official_tf_to_pth_var_mapping[tf_var_name] + assert src_var_name in D_vars + if verbose: + print(f' Converting `{src_var_name}` to `{dst_var_name}`.') + var = D_vars[src_var_name].data + D_state_dict[dst_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Saving target weights to `{dst_weight_path}` ...') + state_dict = { + 'generator': G_state_dict, + 'discriminator': D_state_dict, + 'generator_smooth': Gs_state_dict, + } + torch.save(state_dict, dst_weight_path) + print(f'Successfully saved!') + print(f'--------------------') + + # Start testing if needed. + if test_num <= 0: + warnings.warn(f'Skip testing the converted weights!') + return + + if save_test_image: + html = HtmlPageVisualizer(num_rows=test_num, num_cols=3) + html.set_headers(['Index', 'Before Conversion', 'After Conversion']) + for i in range(test_num): + html.set_cell(i, 0, text=f'{i}') + + print(f'Testing conversion results ...') + G.load_state_dict(G_state_dict) + D.load_state_dict(D_state_dict) + Gs.load_state_dict(Gs_state_dict) + G.eval().cuda() + D.eval().cuda() + Gs.eval().cuda() + model['G'].eval().cuda() + model['D'].eval().cuda() + model['G_ema'].eval().cuda() + + gs_distance = 0.0 + dg_distance = 0.0 + for i in tqdm(range(test_num)): + # Test Gs(z). + code = np.random.randn(1, z_space_dim) + code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + + src_output = model['G_ema'](code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + noise_mode=NOISE_MODE) + src_output = src_output.detach().cpu().numpy() + dst_output = Gs(code, + label=label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + dst_output = dst_output.detach().cpu().numpy() + distance = np.average(np.abs(src_output - dst_output)) + if verbose: + print(f' Test {i:03d}: Gs distance {distance:.6e}.') + gs_distance += distance + + if save_test_image: + html.set_cell(i, 1, image=postprocess_image(src_output)[0]) + html.set_cell(i, 2, image=postprocess_image(dst_output)[0]) + + # Test D(G(z)). + code = np.random.randn(1, z_space_dim) + code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + src_image = model['G'](code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + noise_mode=NOISE_MODE) + src_output = model['D'](src_image, label) + src_output = src_output.detach().cpu().numpy() + dst_image = G(code, + label=label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + dst_output = D(dst_image, label) + dst_output = dst_output.detach().cpu().numpy() + distance = np.average(np.abs(src_output - dst_output)) + if verbose: + print(f' Test {i:03d}: D(G) distance {distance:.6e}.') + dg_distance += distance + + print(f'Average Gs distance is {gs_distance / test_num:.6e}.') + print(f'Average D(G) distance is {dg_distance / test_num:.6e}.') + print(f'========================================') + + if save_test_image: + html.save(f'{dst_weight_path}.conversion_test.html') diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/Dockerfile b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6e391f4d8c4b619e1381594248565f63492daf19 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/Dockerfile @@ -0,0 +1,22 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +FROM nvcr.io/nvidia/pytorch:20.12-py3 + +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +RUN pip install imageio-ffmpeg==0.4.3 pyspng==0.1.0 + +WORKDIR /workspace + +# Unset TORCH_CUDA_ARCH_LIST and exec. This makes pytorch run-time +# extension builds significantly faster as we only compile for the +# currently active GPU configuration. +RUN (printf '#!/bin/bash\nunset TORCH_CUDA_ARCH_LIST\nexec \"$@\"\n' >> /entry.sh) && chmod a+x /entry.sh +ENTRYPOINT ["/entry.sh"] diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/README.md b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/README.md new file mode 100644 index 0000000000000000000000000000000000000000..092b52bb4cdf71264119ffd8511ecbb74719e85a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/README.md @@ -0,0 +1,378 @@ +## StyleGAN2-ADA — Official PyTorch implementation + +![Teaser image](./docs/stylegan2-ada-teaser-1024x252.png) + +**Training Generative Adversarial Networks with Limited Data**
+Tero Karras, Miika Aittala, Janne Hellsten, Samuli Laine, Jaakko Lehtinen, Timo Aila
+https://arxiv.org/abs/2006.06676
+ +Abstract: *Training generative adversarial networks (GAN) using too little data typically leads to discriminator overfitting, causing training to diverge. We propose an adaptive discriminator augmentation mechanism that significantly stabilizes training in limited data regimes. The approach does not require changes to loss functions or network architectures, and is applicable both when training from scratch and when fine-tuning an existing GAN on another dataset. We demonstrate, on several datasets, that good results are now possible using only a few thousand training images, often matching StyleGAN2 results with an order of magnitude fewer images. We expect this to open up new application domains for GANs. We also find that the widely used CIFAR-10 is, in fact, a limited data benchmark, and improve the record FID from 5.59 to 2.42.* + +For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)
+For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)
+ +## Release notes + +This repository is a faithful reimplementation of [StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada/) in PyTorch, focusing on correctness, performance, and compatibility. + +**Correctness** +* Full support for all primary training configurations. +* Extensive verification of image quality, training curves, and quality metrics against the TensorFlow version. +* Results are expected to match in all cases, excluding the effects of pseudo-random numbers and floating-point arithmetic. + +**Performance** +* Training is typically 5%–30% faster compared to the TensorFlow version on NVIDIA Tesla V100 GPUs. +* Inference is up to 35% faster in high resolutions, but it may be slightly slower in low resolutions. +* GPU memory usage is comparable to the TensorFlow version. +* Faster startup time when training new networks (<50s), and also when using pre-trained networks (<4s). +* New command line options for tweaking the training performance. + +**Compatibility** +* Compatible with old network pickles created using the TensorFlow version. +* New ZIP/PNG based dataset format for maximal interoperability with existing 3rd party tools. +* TFRecords datasets are no longer supported — they need to be converted to the new format. +* New JSON-based format for logs, metrics, and training curves. +* Training curves are also exported in the old TFEvents format if TensorBoard is installed. +* Command line syntax is mostly unchanged, with a few exceptions (e.g., `dataset_tool.py`). +* Comparison methods are not supported (`--cmethod`, `--dcap`, `--cfg=cifarbaseline`, `--aug=adarv`) +* **Truncation is now disabled by default.** + +## Data repository + +| Path | Description +| :--- | :---------- +| [stylegan2-ada-pytorch](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/) | Main directory hosted on Amazon S3 +|   ├  [ada-paper.pdf](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/ada-paper.pdf) | Paper PDF +|   ├  [images](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/images/) | Curated example images produced using the pre-trained models +|   ├  [videos](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/videos/) | Curated example interpolation videos +|   └  [pretrained](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/) | Pre-trained models +|     ├  ffhq.pkl | FFHQ at 1024x1024, trained using original StyleGAN2 +|     ├  metfaces.pkl | MetFaces at 1024x1024, transfer learning from FFHQ using ADA +|     ├  afhqcat.pkl | AFHQ Cat at 512x512, trained from scratch using ADA +|     ├  afhqdog.pkl | AFHQ Dog at 512x512, trained from scratch using ADA +|     ├  afhqwild.pkl | AFHQ Wild at 512x512, trained from scratch using ADA +|     ├  cifar10.pkl | Class-conditional CIFAR-10 at 32x32 +|     ├  brecahad.pkl | BreCaHAD at 512x512, trained from scratch using ADA +|     ├  [paper-fig7c-training-set-sweeps](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/paper-fig7c-training-set-sweeps/) | Models used in Fig.7c (sweep over training set size) +|     ├  [paper-fig11a-small-datasets](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/paper-fig11a-small-datasets/) | Models used in Fig.11a (small datasets & transfer learning) +|     ├  [paper-fig11b-cifar10](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/paper-fig11b-cifar10/) | Models used in Fig.11b (CIFAR-10) +|     ├  [transfer-learning-source-nets](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/) | Models used as starting point for transfer learning +|     └  [metrics](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/) | Feature detectors used by the quality metrics + +## Requirements + +* Linux and Windows are supported, but we recommend Linux for performance and compatibility reasons. +* 1–8 high-end NVIDIA GPUs with at least 12 GB of memory. We have done all testing and development using NVIDIA DGX-1 with 8 Tesla V100 GPUs. +* 64-bit Python 3.7 and PyTorch 1.7.1. See [https://pytorch.org/](https://pytorch.org/) for PyTorch install instructions. +* CUDA toolkit 11.0 or later. Use at least version 11.1 if running on RTX 3090. (Why is a separate CUDA toolkit installation required? See comments in [#2](https://github.com/NVlabs/stylegan2-ada-pytorch/issues/2#issuecomment-779457121).) +* Python libraries: `pip install click requests tqdm pyspng ninja imageio-ffmpeg==0.4.3`. We use the Anaconda3 2020.11 distribution which installs most of these by default. +* Docker users: use the [provided Dockerfile](./Dockerfile) to build an image with the required library dependencies. + +The code relies heavily on custom PyTorch extensions that are compiled on the fly using NVCC. On Windows, the compilation requires Microsoft Visual Studio. We recommend installing [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/) and adding it into `PATH` using `"C:\Program Files (x86)\Microsoft Visual Studio\\Community\VC\Auxiliary\Build\vcvars64.bat"`. + +## Getting started + +Pre-trained networks are stored as `*.pkl` files that can be referenced using local filenames or URLs: + +```.bash +# Generate curated MetFaces images without truncation (Fig.10 left) +python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl + +# Generate uncurated MetFaces images with truncation (Fig.12 upper left) +python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl + +# Generate class conditional CIFAR-10 images (Fig.17 left, Car) +python generate.py --outdir=out --seeds=0-35 --class=1 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/cifar10.pkl + +# Style mixing example +python style_mixing.py --outdir=out --rows=85,100,75,458,1500 --cols=55,821,1789,293 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl +``` + +Outputs from the above commands are placed under `out/*.png`, controlled by `--outdir`. Downloaded network pickles are cached under `$HOME/.cache/dnnlib`, which can be overridden by setting the `DNNLIB_CACHE_DIR` environment variable. The default PyTorch extension build directory is `$HOME/.cache/torch_extensions`, which can be overridden by setting `TORCH_EXTENSIONS_DIR`. + +**Docker**: You can run the above curated image example using Docker as follows: + +```.bash +docker build --tag sg2ada:latest . +./docker_run.sh python3 generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl +``` + +Note: The Docker image requires NVIDIA driver release `r455.23` or later. + +**Legacy networks**: The above commands can load most of the network pickles created using the previous TensorFlow versions of StyleGAN2 and StyleGAN2-ADA. However, for future compatibility, we recommend converting such legacy pickles into the new format used by the PyTorch version: + +```.bash +python legacy.py \ + --source=https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-cat-config-f.pkl \ + --dest=stylegan2-cat-config-f.pkl +``` + +## Projecting images to latent space + +To find the matching latent vector for a given image file, run: + +```.bash +python projector.py --outdir=out --target=~/mytargetimg.png \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl +``` + +For optimal results, the target image should be cropped and aligned similar to the [FFHQ dataset](https://github.com/NVlabs/ffhq-dataset). The above command saves the projection target `out/target.png`, result `out/proj.png`, latent vector `out/projected_w.npz`, and progression video `out/proj.mp4`. You can render the resulting latent vector by specifying `--projected_w` for `generate.py`: + +```.bash +python generate.py --outdir=out --projected_w=out/projected_w.npz \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl +``` + +## Using networks from Python + +You can use pre-trained networks in your own Python code as follows: + +```.python +with open('ffhq.pkl', 'rb') as f: + G = pickle.load(f)['G_ema'].cuda() # torch.nn.Module +z = torch.randn([1, G.z_dim]).cuda() # latent codes +c = None # class labels (not used in this example) +img = G(z, c) # NCHW, float32, dynamic range [-1, +1] +``` + +The above code requires `torch_utils` and `dnnlib` to be accessible via `PYTHONPATH`. It does not need source code for the networks themselves — their class definitions are loaded from the pickle via `torch_utils.persistence`. + +The pickle contains three networks. `'G'` and `'D'` are instantaneous snapshots taken during training, and `'G_ema'` represents a moving average of the generator weights over several training steps. The networks are regular instances of `torch.nn.Module`, with all of their parameters and buffers placed on the CPU at import and gradient computation disabled by default. + +The generator consists of two submodules, `G.mapping` and `G.synthesis`, that can be executed separately. They also support various additional options: + +```.python +w = G.mapping(z, c, truncation_psi=0.5, truncation_cutoff=8) +img = G.synthesis(w, noise_mode='const', force_fp32=True) +``` + +Please refer to [`generate.py`](./generate.py), [`style_mixing.py`](./style_mixing.py), and [`projector.py`](./projector.py) for further examples. + +## Preparing datasets + +Datasets are stored as uncompressed ZIP archives containing uncompressed PNG files and a metadata file `dataset.json` for labels. + +Custom datasets can be created from a folder containing images; see [`python dataset_tool.py --help`](./docs/dataset-tool-help.txt) for more information. Alternatively, the folder can also be used directly as a dataset, without running it through `dataset_tool.py` first, but doing so may lead to suboptimal performance. + +Legacy TFRecords datasets are not supported — see below for instructions on how to convert them. + +**FFHQ**: + +Step 1: Download the [Flickr-Faces-HQ dataset](https://github.com/NVlabs/ffhq-dataset) as TFRecords. + +Step 2: Extract images from TFRecords using `dataset_tool.py` from the [TensorFlow version of StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada/): + +```.bash +# Using dataset_tool.py from TensorFlow version at +# https://github.com/NVlabs/stylegan2-ada/ +python ../stylegan2-ada/dataset_tool.py unpack \ + --tfrecord_dir=~/ffhq-dataset/tfrecords/ffhq --output_dir=/tmp/ffhq-unpacked +``` + +Step 3: Create ZIP archive using `dataset_tool.py` from this repository: + +```.bash +# Original 1024x1024 resolution. +python dataset_tool.py --source=/tmp/ffhq-unpacked --dest=~/datasets/ffhq.zip + +# Scaled down 256x256 resolution. +python dataset_tool.py --source=/tmp/ffhq-unpacked --dest=~/datasets/ffhq256x256.zip \ + --width=256 --height=256 +``` + +**MetFaces**: Download the [MetFaces dataset](https://github.com/NVlabs/metfaces-dataset) and create ZIP archive: + +```.bash +python dataset_tool.py --source=~/downloads/metfaces/images --dest=~/datasets/metfaces.zip +``` + +**AFHQ**: Download the [AFHQ dataset](https://github.com/clovaai/stargan-v2/blob/master/README.md#animal-faces-hq-dataset-afhq) and create ZIP archive: + +```.bash +python dataset_tool.py --source=~/downloads/afhq/train/cat --dest=~/datasets/afhqcat.zip +python dataset_tool.py --source=~/downloads/afhq/train/dog --dest=~/datasets/afhqdog.zip +python dataset_tool.py --source=~/downloads/afhq/train/wild --dest=~/datasets/afhqwild.zip +``` + +**CIFAR-10**: Download the [CIFAR-10 python version](https://www.cs.toronto.edu/~kriz/cifar.html) and convert to ZIP archive: + +```.bash +python dataset_tool.py --source=~/downloads/cifar-10-python.tar.gz --dest=~/datasets/cifar10.zip +``` + +**LSUN**: Download the desired categories from the [LSUN project page](https://www.yf.io/p/lsun/) and convert to ZIP archive: + +```.bash +python dataset_tool.py --source=~/downloads/lsun/raw/cat_lmdb --dest=~/datasets/lsuncat200k.zip \ + --transform=center-crop --width=256 --height=256 --max_images=200000 + +python dataset_tool.py --source=~/downloads/lsun/raw/car_lmdb --dest=~/datasets/lsuncar200k.zip \ + --transform=center-crop-wide --width=512 --height=384 --max_images=200000 +``` + +**BreCaHAD**: + +Step 1: Download the [BreCaHAD dataset](https://figshare.com/articles/BreCaHAD_A_Dataset_for_Breast_Cancer_Histopathological_Annotation_and_Diagnosis/7379186). + +Step 2: Extract 512x512 resolution crops using `dataset_tool.py` from the [TensorFlow version of StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada/): + +```.bash +# Using dataset_tool.py from TensorFlow version at +# https://github.com/NVlabs/stylegan2-ada/ +python dataset_tool.py extract_brecahad_crops --cropsize=512 \ + --output_dir=/tmp/brecahad-crops --brecahad_dir=~/downloads/brecahad/images +``` + +Step 3: Create ZIP archive using `dataset_tool.py` from this repository: + +```.bash +python dataset_tool.py --source=/tmp/brecahad-crops --dest=~/datasets/brecahad.zip +``` + +## Training new networks + +In its most basic form, training new networks boils down to: + +```.bash +python train.py --outdir=~/training-runs --data=~/mydataset.zip --gpus=1 --dry-run +python train.py --outdir=~/training-runs --data=~/mydataset.zip --gpus=1 +``` + +The first command is optional; it validates the arguments, prints out the training configuration, and exits. The second command kicks off the actual training. + +In this example, the results are saved to a newly created directory `~/training-runs/-mydataset-auto1`, controlled by `--outdir`. The training exports network pickles (`network-snapshot-.pkl`) and example images (`fakes.png`) at regular intervals (controlled by `--snap`). For each pickle, it also evaluates FID (controlled by `--metrics`) and logs the resulting scores in `metric-fid50k_full.jsonl` (as well as TFEvents if TensorBoard is installed). + +The name of the output directory reflects the training configuration. For example, `00000-mydataset-auto1` indicates that the *base configuration* was `auto1`, meaning that the hyperparameters were selected automatically for training on one GPU. The base configuration is controlled by `--cfg`: + +| Base config | Description +| :-------------------- | :---------- +| `auto` (default) | Automatically select reasonable defaults based on resolution and GPU count. Serves as a good starting point for new datasets but does not necessarily lead to optimal results. +| `stylegan2` | Reproduce results for StyleGAN2 config F at 1024x1024 using 1, 2, 4, or 8 GPUs. +| `paper256` | Reproduce results for FFHQ and LSUN Cat at 256x256 using 1, 2, 4, or 8 GPUs. +| `paper512` | Reproduce results for BreCaHAD and AFHQ at 512x512 using 1, 2, 4, or 8 GPUs. +| `paper1024` | Reproduce results for MetFaces at 1024x1024 using 1, 2, 4, or 8 GPUs. +| `cifar` | Reproduce results for CIFAR-10 (tuned configuration) using 1 or 2 GPUs. + +The training configuration can be further customized with additional command line options: + +* `--aug=noaug` disables ADA. +* `--cond=1` enables class-conditional training (requires a dataset with labels). +* `--mirror=1` amplifies the dataset with x-flips. Often beneficial, even with ADA. +* `--resume=ffhq1024 --snap=10` performs transfer learning from FFHQ trained at 1024x1024. +* `--resume=~/training-runs//network-snapshot-.pkl` resumes a previous training run. +* `--gamma=10` overrides R1 gamma. We recommend trying a couple of different values for each new dataset. +* `--aug=ada --target=0.7` adjusts ADA target value (default: 0.6). +* `--augpipe=blit` enables pixel blitting but disables all other augmentations. +* `--augpipe=bgcfnc` enables all available augmentations (blit, geom, color, filter, noise, cutout). + +Please refer to [`python train.py --help`](./docs/train-help.txt) for the full list. + +## Expected training time + +The total training time depends heavily on resolution, number of GPUs, dataset, desired quality, and hyperparameters. The following table lists expected wallclock times to reach different points in the training, measured in thousands of real images shown to the discriminator ("kimg"): + +| Resolution | GPUs | 1000 kimg | 25000 kimg | sec/kimg | GPU mem | CPU mem +| :--------: | :--: | :-------: | :--------: | :---------------: | :-----: | :-----: +| 128x128 | 1 | 4h 05m | 4d 06h | 12.8–13.7 | 7.2 GB | 3.9 GB +| 128x128 | 2 | 2h 06m | 2d 04h | 6.5–6.8 | 7.4 GB | 7.9 GB +| 128x128 | 4 | 1h 20m | 1d 09h | 4.1–4.6 | 4.2 GB | 16.3 GB +| 128x128 | 8 | 1h 13m | 1d 06h | 3.9–4.9 | 2.6 GB | 31.9 GB +| 256x256 | 1 | 6h 36m | 6d 21h | 21.6–24.2 | 5.0 GB | 4.5 GB +| 256x256 | 2 | 3h 27m | 3d 14h | 11.2–11.8 | 5.2 GB | 9.0 GB +| 256x256 | 4 | 1h 45m | 1d 20h | 5.6–5.9 | 5.2 GB | 17.8 GB +| 256x256 | 8 | 1h 24m | 1d 11h | 4.4–5.5 | 3.2 GB | 34.7 GB +| 512x512 | 1 | 21h 03m | 21d 22h | 72.5–74.9 | 7.6 GB | 5.0 GB +| 512x512 | 2 | 10h 59m | 11d 10h | 37.7–40.0 | 7.8 GB | 9.8 GB +| 512x512 | 4 | 5h 29m | 5d 17h | 18.7–19.1 | 7.9 GB | 17.7 GB +| 512x512 | 8 | 2h 48m | 2d 22h | 9.5–9.7 | 7.8 GB | 38.2 GB +| 1024x1024 | 1 | 1d 20h | 46d 03h | 154.3–161.6 | 8.1 GB | 5.3 GB +| 1024x1024 | 2 | 23h 09m | 24d 02h | 80.6–86.2 | 8.6 GB | 11.9 GB +| 1024x1024 | 4 | 11h 36m | 12d 02h | 40.1–40.8 | 8.4 GB | 21.9 GB +| 1024x1024 | 8 | 5h 54m | 6d 03h | 20.2–20.6 | 8.3 GB | 44.7 GB + +The above measurements were done using NVIDIA Tesla V100 GPUs with default settings (`--cfg=auto --aug=ada --metrics=fid50k_full`). "sec/kimg" shows the expected range of variation in raw training performance, as reported in `log.txt`. "GPU mem" and "CPU mem" show the highest observed memory consumption, excluding the peak at the beginning caused by `torch.backends.cudnn.benchmark`. + +In typical cases, 25000 kimg or more is needed to reach convergence, but the results are already quite reasonable around 5000 kimg. 1000 kimg is often enough for transfer learning, which tends to converge significantly faster. The following figure shows example convergence curves for different datasets as a function of wallclock time, using the same settings as above: + +![Training curves](./docs/stylegan2-ada-training-curves.png) + +Note: `--cfg=auto` serves as a reasonable first guess for the hyperparameters but it does not necessarily lead to optimal results for a given dataset. For example, `--cfg=stylegan2` yields considerably better FID for FFHQ-140k at 1024x1024 than illustrated above. We recommend trying out at least a few different values of `--gamma` for each new dataset. + +## Quality metrics + +By default, `train.py` automatically computes FID for each network pickle exported during training. We recommend inspecting `metric-fid50k_full.jsonl` (or TensorBoard) at regular intervals to monitor the training progress. When desired, the automatic computation can be disabled with `--metrics=none` to speed up the training slightly (3%–9%). + +Additional quality metrics can also be computed after the training: + +```.bash +# Previous training run: look up options automatically, save result to JSONL file. +python calc_metrics.py --metrics=pr50k3_full \ + --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl + +# Pre-trained network pickle: specify dataset explicitly, print result to stdout. +python calc_metrics.py --metrics=fid50k_full --data=~/datasets/ffhq.zip --mirror=1 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl +``` + +The first example looks up the training configuration and performs the same operation as if `--metrics=pr50k3_full` had been specified during training. The second example downloads a pre-trained network pickle, in which case the values of `--mirror` and `--data` must be specified explicitly. + +Note that many of the metrics have a significant one-off cost when calculating them for the first time for a new dataset (up to 30min). Also note that the evaluation is done using a different random seed each time, so the results will vary if the same metric is computed multiple times. + +We employ the following metrics in the ADA paper. Execution time and GPU memory usage is reported for one NVIDIA Tesla V100 GPU at 1024x1024 resolution: + +| Metric | Time | GPU mem | Description | +| :----- | :----: | :-----: | :---------- | +| `fid50k_full` | 13 min | 1.8 GB | Fréchet inception distance[1] against the full dataset +| `kid50k_full` | 13 min | 1.8 GB | Kernel inception distance[2] against the full dataset +| `pr50k3_full` | 13 min | 4.1 GB | Precision and recall[3] againt the full dataset +| `is50k` | 13 min | 1.8 GB | Inception score[4] for CIFAR-10 + +In addition, the following metrics from the [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2) papers are also supported: + +| Metric | Time | GPU mem | Description | +| :------------ | :----: | :-----: | :---------- | +| `fid50k` | 13 min | 1.8 GB | Fréchet inception distance against 50k real images +| `kid50k` | 13 min | 1.8 GB | Kernel inception distance against 50k real images +| `pr50k3` | 13 min | 4.1 GB | Precision and recall against 50k real images +| `ppl2_wend` | 36 min | 2.4 GB | Perceptual path length[5] in W, endpoints, full image +| `ppl_zfull` | 36 min | 2.4 GB | Perceptual path length in Z, full paths, cropped image +| `ppl_wfull` | 36 min | 2.4 GB | Perceptual path length in W, full paths, cropped image +| `ppl_zend` | 36 min | 2.4 GB | Perceptual path length in Z, endpoints, cropped image +| `ppl_wend` | 36 min | 2.4 GB | Perceptual path length in W, endpoints, cropped image + +References: +1. [GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium](https://arxiv.org/abs/1706.08500), Heusel et al. 2017 +2. [Demystifying MMD GANs](https://arxiv.org/abs/1801.01401), Bińkowski et al. 2018 +3. [Improved Precision and Recall Metric for Assessing Generative Models](https://arxiv.org/abs/1904.06991), Kynkäänniemi et al. 2019 +4. [Improved Techniques for Training GANs](https://arxiv.org/abs/1606.03498), Salimans et al. 2016 +5. [A Style-Based Generator Architecture for Generative Adversarial Networks](https://arxiv.org/abs/1812.04948), Karras et al. 2018 + +## License + +Copyright © 2021, NVIDIA Corporation. All rights reserved. + +This work is made available under the [Nvidia Source Code License](https://nvlabs.github.io/stylegan2-ada-pytorch/license.html). + +## Citation + +``` +@inproceedings{Karras2020ada, + title = {Training Generative Adversarial Networks with Limited Data}, + author = {Tero Karras and Miika Aittala and Janne Hellsten and Samuli Laine and Jaakko Lehtinen and Timo Aila}, + booktitle = {Proc. NeurIPS}, + year = {2020} +} +``` + +## Development + +This is a research reference implementation and is treated as a one-time code drop. As such, we do not accept outside code contributions in the form of pull requests. + +## Acknowledgements + +We thank David Luebke for helpful comments; Tero Kuosmanen and Sabu Nadarajan for their support with compute infrastructure; and Edgar Schönfeld for guidance on setting up unconditional BigGAN. diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/calc_metrics.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/calc_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..03e828195a096f6f78da241b700c16f56327bdb8 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/calc_metrics.py @@ -0,0 +1,190 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Calculate quality metrics for previous training run or pretrained network pickle.""" + +import os +import click +import json +import tempfile +import copy +import torch +import dnnlib + +import legacy +from metrics import metric_main +from metrics import metric_utils +from torch_utils import training_stats +from torch_utils import custom_ops +from torch_utils import misc + +#---------------------------------------------------------------------------- + +def subprocess_fn(rank, args, temp_dir): + dnnlib.util.Logger(should_flush=True) + + # Init torch.distributed. + if args.num_gpus > 1: + init_file = os.path.abspath(os.path.join(temp_dir, '.torch_distributed_init')) + if os.name == 'nt': + init_method = 'file:///' + init_file.replace('\\', '/') + torch.distributed.init_process_group(backend='gloo', init_method=init_method, rank=rank, world_size=args.num_gpus) + else: + init_method = f'file://{init_file}' + torch.distributed.init_process_group(backend='nccl', init_method=init_method, rank=rank, world_size=args.num_gpus) + + # Init torch_utils. + sync_device = torch.device('cuda', rank) if args.num_gpus > 1 else None + training_stats.init_multiprocessing(rank=rank, sync_device=sync_device) + if rank != 0 or not args.verbose: + custom_ops.verbosity = 'none' + + # Print network summary. + device = torch.device('cuda', rank) + torch.backends.cudnn.benchmark = True + torch.backends.cuda.matmul.allow_tf32 = False + torch.backends.cudnn.allow_tf32 = False + G = copy.deepcopy(args.G).eval().requires_grad_(False).to(device) + if rank == 0 and args.verbose: + z = torch.empty([1, G.z_dim], device=device) + c = torch.empty([1, G.c_dim], device=device) + misc.print_module_summary(G, [z, c]) + + # Calculate each metric. + for metric in args.metrics: + if rank == 0 and args.verbose: + print(f'Calculating {metric}...') + progress = metric_utils.ProgressMonitor(verbose=args.verbose) + result_dict = metric_main.calc_metric(metric=metric, G=G, dataset_kwargs=args.dataset_kwargs, + num_gpus=args.num_gpus, rank=rank, device=device, progress=progress) + if rank == 0: + metric_main.report_metric(result_dict, run_dir=args.run_dir, snapshot_pkl=args.network_pkl) + if rank == 0 and args.verbose: + print() + + # Done. + if rank == 0 and args.verbose: + print('Exiting...') + +#---------------------------------------------------------------------------- + +class CommaSeparatedList(click.ParamType): + name = 'list' + + def convert(self, value, param, ctx): + _ = param, ctx + if value is None or value.lower() == 'none' or value == '': + return [] + return value.split(',') + +#---------------------------------------------------------------------------- + +@click.command() +@click.pass_context +@click.option('network_pkl', '--network', help='Network pickle filename or URL', metavar='PATH', required=True) +@click.option('--metrics', help='Comma-separated list or "none"', type=CommaSeparatedList(), default='fid50k_full', show_default=True) +@click.option('--data', help='Dataset to evaluate metrics against (directory or zip) [default: same as training data]', metavar='PATH') +@click.option('--mirror', help='Whether the dataset was augmented with x-flips during training [default: look up]', type=bool, metavar='BOOL') +@click.option('--gpus', help='Number of GPUs to use', type=int, default=1, metavar='INT', show_default=True) +@click.option('--verbose', help='Print optional information', type=bool, default=True, metavar='BOOL', show_default=True) + +def calc_metrics(ctx, network_pkl, metrics, data, mirror, gpus, verbose): + """Calculate quality metrics for previous training run or pretrained network pickle. + + Examples: + + \b + # Previous training run: look up options automatically, save result to JSONL file. + python calc_metrics.py --metrics=pr50k3_full \\ + --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl + + \b + # Pre-trained network pickle: specify dataset explicitly, print result to stdout. + python calc_metrics.py --metrics=fid50k_full --data=~/datasets/ffhq.zip --mirror=1 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl + + Available metrics: + + \b + ADA paper: + fid50k_full Frechet inception distance against the full dataset. + kid50k_full Kernel inception distance against the full dataset. + pr50k3_full Precision and recall againt the full dataset. + is50k Inception score for CIFAR-10. + + \b + StyleGAN and StyleGAN2 papers: + fid50k Frechet inception distance against 50k real images. + kid50k Kernel inception distance against 50k real images. + pr50k3 Precision and recall against 50k real images. + ppl2_wend Perceptual path length in W at path endpoints against full image. + ppl_zfull Perceptual path length in Z for full paths against cropped image. + ppl_wfull Perceptual path length in W for full paths against cropped image. + ppl_zend Perceptual path length in Z at path endpoints against cropped image. + ppl_wend Perceptual path length in W at path endpoints against cropped image. + """ + dnnlib.util.Logger(should_flush=True) + + # Validate arguments. + args = dnnlib.EasyDict(metrics=metrics, num_gpus=gpus, network_pkl=network_pkl, verbose=verbose) + if not all(metric_main.is_valid_metric(metric) for metric in args.metrics): + ctx.fail('\n'.join(['--metrics can only contain the following values:'] + metric_main.list_valid_metrics())) + if not args.num_gpus >= 1: + ctx.fail('--gpus must be at least 1') + + # Load network. + if not dnnlib.util.is_url(network_pkl, allow_file_urls=True) and not os.path.isfile(network_pkl): + ctx.fail('--network must point to a file or URL') + if args.verbose: + print(f'Loading network from "{network_pkl}"...') + with dnnlib.util.open_url(network_pkl, verbose=args.verbose) as f: + network_dict = legacy.load_network_pkl(f) + args.G = network_dict['G_ema'] # subclass of torch.nn.Module + + # Initialize dataset options. + if data is not None: + args.dataset_kwargs = dnnlib.EasyDict(class_name='training.dataset.ImageFolderDataset', path=data) + elif network_dict['training_set_kwargs'] is not None: + args.dataset_kwargs = dnnlib.EasyDict(network_dict['training_set_kwargs']) + else: + ctx.fail('Could not look up dataset options; please specify --data') + + # Finalize dataset options. + args.dataset_kwargs.resolution = args.G.img_resolution + args.dataset_kwargs.use_labels = (args.G.c_dim != 0) + if mirror is not None: + args.dataset_kwargs.xflip = mirror + + # Print dataset options. + if args.verbose: + print('Dataset options:') + print(json.dumps(args.dataset_kwargs, indent=2)) + + # Locate run dir. + args.run_dir = None + if os.path.isfile(network_pkl): + pkl_dir = os.path.dirname(network_pkl) + if os.path.isfile(os.path.join(pkl_dir, 'training_options.json')): + args.run_dir = pkl_dir + + # Launch processes. + if args.verbose: + print('Launching processes...') + torch.multiprocessing.set_start_method('spawn') + with tempfile.TemporaryDirectory() as temp_dir: + if args.num_gpus == 1: + subprocess_fn(rank=0, args=args, temp_dir=temp_dir) + else: + torch.multiprocessing.spawn(fn=subprocess_fn, args=(args, temp_dir), nprocs=args.num_gpus) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + calc_metrics() # pylint: disable=no-value-for-parameter + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dataset_tool.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dataset_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..c59e6292891c3896722965020af7c60056729f2d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dataset_tool.py @@ -0,0 +1,444 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import functools +import io +import json +import os +import pickle +import sys +import tarfile +import gzip +import zipfile +from pathlib import Path +from typing import Callable, Optional, Tuple, Union + +import click +import numpy as np +import PIL.Image +from tqdm import tqdm + +#---------------------------------------------------------------------------- + +def error(msg): + print('Error: ' + msg) + sys.exit(1) + +#---------------------------------------------------------------------------- + +def maybe_min(a: int, b: Optional[int]) -> int: + if b is not None: + return min(a, b) + return a + +#---------------------------------------------------------------------------- + +def file_ext(name: Union[str, Path]) -> str: + return str(name).split('.')[-1] + +#---------------------------------------------------------------------------- + +def is_image_ext(fname: Union[str, Path]) -> bool: + ext = file_ext(fname).lower() + return f'.{ext}' in PIL.Image.EXTENSION # type: ignore + +#---------------------------------------------------------------------------- + +def open_image_folder(source_dir, *, max_images: Optional[int]): + input_images = [str(f) for f in sorted(Path(source_dir).rglob('*')) if is_image_ext(f) and os.path.isfile(f)] + + # Load labels. + labels = {} + meta_fname = os.path.join(source_dir, 'dataset.json') + if os.path.isfile(meta_fname): + with open(meta_fname, 'r') as file: + labels = json.load(file)['labels'] + if labels is not None: + labels = { x[0]: x[1] for x in labels } + else: + labels = {} + + max_idx = maybe_min(len(input_images), max_images) + + def iterate_images(): + for idx, fname in enumerate(input_images): + arch_fname = os.path.relpath(fname, source_dir) + arch_fname = arch_fname.replace('\\', '/') + img = np.array(PIL.Image.open(fname)) + yield dict(img=img, label=labels.get(arch_fname)) + if idx >= max_idx-1: + break + return max_idx, iterate_images() + +#---------------------------------------------------------------------------- + +def open_image_zip(source, *, max_images: Optional[int]): + with zipfile.ZipFile(source, mode='r') as z: + input_images = [str(f) for f in sorted(z.namelist()) if is_image_ext(f)] + + # Load labels. + labels = {} + if 'dataset.json' in z.namelist(): + with z.open('dataset.json', 'r') as file: + labels = json.load(file)['labels'] + if labels is not None: + labels = { x[0]: x[1] for x in labels } + else: + labels = {} + + max_idx = maybe_min(len(input_images), max_images) + + def iterate_images(): + with zipfile.ZipFile(source, mode='r') as z: + for idx, fname in enumerate(input_images): + with z.open(fname, 'r') as file: + img = PIL.Image.open(file) # type: ignore + img = np.array(img) + yield dict(img=img, label=labels.get(fname)) + if idx >= max_idx-1: + break + return max_idx, iterate_images() + +#---------------------------------------------------------------------------- + +def open_lmdb(lmdb_dir: str, *, max_images: Optional[int]): + import cv2 # pip install opencv-python + import lmdb # pip install lmdb # pylint: disable=import-error + + with lmdb.open(lmdb_dir, readonly=True, lock=False).begin(write=False) as txn: + max_idx = maybe_min(txn.stat()['entries'], max_images) + + def iterate_images(): + with lmdb.open(lmdb_dir, readonly=True, lock=False).begin(write=False) as txn: + for idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.frombuffer(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.array(PIL.Image.open(io.BytesIO(value))) + yield dict(img=img, label=None) + if idx >= max_idx-1: + break + except: + print(sys.exc_info()[1]) + + return max_idx, iterate_images() + +#---------------------------------------------------------------------------- + +def open_cifar10(tarball: str, *, max_images: Optional[int]): + images = [] + labels = [] + + with tarfile.open(tarball, 'r:gz') as tar: + for batch in range(1, 6): + member = tar.getmember(f'cifar-10-batches-py/data_batch_{batch}') + with tar.extractfile(member) as file: + data = pickle.load(file, encoding='latin1') + images.append(data['data'].reshape(-1, 3, 32, 32)) + labels.append(data['labels']) + + images = np.concatenate(images) + labels = np.concatenate(labels) + images = images.transpose([0, 2, 3, 1]) # NCHW -> NHWC + assert images.shape == (50000, 32, 32, 3) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype in [np.int32, np.int64] + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + + max_idx = maybe_min(len(images), max_images) + + def iterate_images(): + for idx, img in enumerate(images): + yield dict(img=img, label=int(labels[idx])) + if idx >= max_idx-1: + break + + return max_idx, iterate_images() + +#---------------------------------------------------------------------------- + +def open_mnist(images_gz: str, *, max_images: Optional[int]): + labels_gz = images_gz.replace('-images-idx3-ubyte.gz', '-labels-idx1-ubyte.gz') + assert labels_gz != images_gz + images = [] + labels = [] + + with gzip.open(images_gz, 'rb') as f: + images = np.frombuffer(f.read(), np.uint8, offset=16) + with gzip.open(labels_gz, 'rb') as f: + labels = np.frombuffer(f.read(), np.uint8, offset=8) + + images = images.reshape(-1, 28, 28) + images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (60000,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + + max_idx = maybe_min(len(images), max_images) + + def iterate_images(): + for idx, img in enumerate(images): + yield dict(img=img, label=int(labels[idx])) + if idx >= max_idx-1: + break + + return max_idx, iterate_images() + +#---------------------------------------------------------------------------- + +def make_transform( + transform: Optional[str], + output_width: Optional[int], + output_height: Optional[int], + resize_filter: str +) -> Callable[[np.ndarray], Optional[np.ndarray]]: + resample = { 'box': PIL.Image.BOX, 'lanczos': PIL.Image.LANCZOS }[resize_filter] + def scale(width, height, img): + w = img.shape[1] + h = img.shape[0] + if width == w and height == h: + return img + img = PIL.Image.fromarray(img) + ww = width if width is not None else w + hh = height if height is not None else h + img = img.resize((ww, hh), resample) + return np.array(img) + + def center_crop(width, height, img): + crop = np.min(img.shape[:2]) + img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((width, height), resample) + return np.array(img) + + def center_crop_wide(width, height, img): + ch = int(np.round(width * img.shape[0] / img.shape[1])) + if img.shape[1] < width or ch < height: + return None + + img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((width, height), resample) + img = np.array(img) + + canvas = np.zeros([width, width, 3], dtype=np.uint8) + canvas[(width - height) // 2 : (width + height) // 2, :] = img + return canvas + + if transform is None: + return functools.partial(scale, output_width, output_height) + if transform == 'center-crop': + if (output_width is None) or (output_height is None): + error ('must specify --width and --height when using ' + transform + 'transform') + return functools.partial(center_crop, output_width, output_height) + if transform == 'center-crop-wide': + if (output_width is None) or (output_height is None): + error ('must specify --width and --height when using ' + transform + ' transform') + return functools.partial(center_crop_wide, output_width, output_height) + assert False, 'unknown transform' + +#---------------------------------------------------------------------------- + +def open_dataset(source, *, max_images: Optional[int]): + if os.path.isdir(source): + if source.rstrip('/').endswith('_lmdb'): + return open_lmdb(source, max_images=max_images) + else: + return open_image_folder(source, max_images=max_images) + elif os.path.isfile(source): + if os.path.basename(source) == 'cifar-10-python.tar.gz': + return open_cifar10(source, max_images=max_images) + elif os.path.basename(source) == 'train-images-idx3-ubyte.gz': + return open_mnist(source, max_images=max_images) + elif file_ext(source) == 'zip': + return open_image_zip(source, max_images=max_images) + else: + assert False, 'unknown archive type' + else: + error(f'Missing input file or directory: {source}') + +#---------------------------------------------------------------------------- + +def open_dest(dest: str) -> Tuple[str, Callable[[str, Union[bytes, str]], None], Callable[[], None]]: + dest_ext = file_ext(dest) + + if dest_ext == 'zip': + if os.path.dirname(dest) != '': + os.makedirs(os.path.dirname(dest), exist_ok=True) + zf = zipfile.ZipFile(file=dest, mode='w', compression=zipfile.ZIP_STORED) + def zip_write_bytes(fname: str, data: Union[bytes, str]): + zf.writestr(fname, data) + return '', zip_write_bytes, zf.close + else: + # If the output folder already exists, check that is is + # empty. + # + # Note: creating the output directory is not strictly + # necessary as folder_write_bytes() also mkdirs, but it's better + # to give an error message earlier in case the dest folder + # somehow cannot be created. + if os.path.isdir(dest) and len(os.listdir(dest)) != 0: + error('--dest folder must be empty') + os.makedirs(dest, exist_ok=True) + + def folder_write_bytes(fname: str, data: Union[bytes, str]): + os.makedirs(os.path.dirname(fname), exist_ok=True) + with open(fname, 'wb') as fout: + if isinstance(data, str): + data = data.encode('utf8') + fout.write(data) + return dest, folder_write_bytes, lambda: None + +#---------------------------------------------------------------------------- + +@click.command() +@click.pass_context +@click.option('--source', help='Directory or archive name for input dataset', required=True, metavar='PATH') +@click.option('--dest', help='Output directory or archive name for output dataset', required=True, metavar='PATH') +@click.option('--max-images', help='Output only up to `max-images` images', type=int, default=None) +@click.option('--resize-filter', help='Filter to use when resizing images for output resolution', type=click.Choice(['box', 'lanczos']), default='lanczos', show_default=True) +@click.option('--transform', help='Input crop/resize mode', type=click.Choice(['center-crop', 'center-crop-wide'])) +@click.option('--width', help='Output width', type=int) +@click.option('--height', help='Output height', type=int) +def convert_dataset( + ctx: click.Context, + source: str, + dest: str, + max_images: Optional[int], + transform: Optional[str], + resize_filter: str, + width: Optional[int], + height: Optional[int] +): + """Convert an image dataset into a dataset archive usable with StyleGAN2 ADA PyTorch. + + The input dataset format is guessed from the --source argument: + + \b + --source *_lmdb/ Load LSUN dataset + --source cifar-10-python.tar.gz Load CIFAR-10 dataset + --source train-images-idx3-ubyte.gz Load MNIST dataset + --source path/ Recursively load all images from path/ + --source dataset.zip Recursively load all images from dataset.zip + + Specifying the output format and path: + + \b + --dest /path/to/dir Save output files under /path/to/dir + --dest /path/to/dataset.zip Save output files into /path/to/dataset.zip + + The output dataset format can be either an image folder or an uncompressed zip archive. + Zip archives makes it easier to move datasets around file servers and clusters, and may + offer better training performance on network file systems. + + Images within the dataset archive will be stored as uncompressed PNG. + Uncompresed PNGs can be efficiently decoded in the training loop. + + Class labels are stored in a file called 'dataset.json' that is stored at the + dataset root folder. This file has the following structure: + + \b + { + "labels": [ + ["00000/img00000000.png",6], + ["00000/img00000001.png",9], + ... repeated for every image in the datase + ["00049/img00049999.png",1] + ] + } + + If the 'dataset.json' file cannot be found, the dataset is interpreted as + not containing class labels. + + Image scale/crop and resolution requirements: + + Output images must be square-shaped and they must all have the same power-of-two + dimensions. + + To scale arbitrary input image size to a specific width and height, use the + --width and --height options. Output resolution will be either the original + input resolution (if --width/--height was not specified) or the one specified with + --width/height. + + Use the --transform=center-crop or --transform=center-crop-wide options to apply a + center crop transform on the input image. These options should be used with the + --width and --height options. For example: + + \b + python dataset_tool.py --source LSUN/raw/cat_lmdb --dest /tmp/lsun_cat \\ + --transform=center-crop-wide --width 512 --height=384 + """ + + PIL.Image.init() # type: ignore + + if dest == '': + ctx.fail('--dest output filename or directory must not be an empty string') + + num_files, input_iter = open_dataset(source, max_images=max_images) + archive_root_dir, save_bytes, close_dest = open_dest(dest) + + transform_image = make_transform(transform, width, height, resize_filter) + + dataset_attrs = None + + labels = [] + for idx, image in tqdm(enumerate(input_iter), total=num_files): + idx_str = f'{idx:08d}' + archive_fname = f'{idx_str[:5]}/img{idx_str}.png' + + # Apply crop and resize. + img = transform_image(image['img']) + + # Transform may drop images. + if img is None: + continue + + # Error check to require uniform image attributes across + # the whole dataset. + channels = img.shape[2] if img.ndim == 3 else 1 + cur_image_attrs = { + 'width': img.shape[1], + 'height': img.shape[0], + 'channels': channels + } + if dataset_attrs is None: + dataset_attrs = cur_image_attrs + width = dataset_attrs['width'] + height = dataset_attrs['height'] + if width != height: + error(f'Image dimensions after scale and crop are required to be square. Got {width}x{height}') + if dataset_attrs['channels'] not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + if width != 2 ** int(np.floor(np.log2(width))): + error('Image width/height after scale and crop are required to be power-of-two') + elif dataset_attrs != cur_image_attrs: + err = [f' dataset {k}/cur image {k}: {dataset_attrs[k]}/{cur_image_attrs[k]}' for k in dataset_attrs.keys()] + error(f'Image {archive_fname} attributes must be equal across all images of the dataset. Got:\n' + '\n'.join(err)) + + # Save the image as an uncompressed PNG. + img = PIL.Image.fromarray(img, { 1: 'L', 3: 'RGB' }[channels]) + image_bits = io.BytesIO() + img.save(image_bits, format='png', compress_level=0, optimize=False) + save_bytes(os.path.join(archive_root_dir, archive_fname), image_bits.getbuffer()) + labels.append([archive_fname, image['label']] if image['label'] is not None else None) + + metadata = { + 'labels': labels if all(x is not None for x in labels) else None + } + save_bytes(os.path.join(archive_root_dir, 'dataset.json'), json.dumps(metadata)) + close_dest() + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + convert_dataset() # pylint: disable=no-value-for-parameter diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dnnlib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dnnlib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a2cd19cb09ef6cd5a9f74d3b97a91c6aa080558 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dnnlib/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +from .util import EasyDict, make_cache_dir_path diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dnnlib/util.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dnnlib/util.py new file mode 100644 index 0000000000000000000000000000000000000000..4771462e2a75387cfc2b9eec793f8238cb5c345c --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/dnnlib/util.py @@ -0,0 +1,477 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import tempfile +import urllib +import urllib.request +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: Union[str, bytes]) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if isinstance(text, bytes): + text = text.decode() + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + self.file = None + + +# Cache directories +# ------------------------------------------------------------------------------------------ + +_dnnlib_cache_dir = None + +def set_cache_dir(path: str) -> None: + global _dnnlib_cache_dir + _dnnlib_cache_dir = path + +def make_cache_dir_path(*paths: str) -> str: + if _dnnlib_cache_dir is not None: + return os.path.join(_dnnlib_cache_dir, *paths) + if 'DNNLIB_CACHE_DIR' in os.environ: + return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths) + if 'HOME' in os.environ: + return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths) + if 'USERPROFILE' in os.environ: + return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths) + return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths) + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any: + """Finds the python class with the given name and constructs it with the given arguments.""" + return call_func_by_name(*args, func_name=class_name, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + module = obj.__module__ + if module == '__main__': + module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0] + return module + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any, allow_file_urls: bool = False) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + if allow_file_urls and obj.startswith('file://'): + return True + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert num_attempts >= 1 + assert not (return_filename and (not cache)) + + # Doesn't look like an URL scheme so interpret it as a local filename. + if not re.match('^[a-z]+://', url): + return url if return_filename else open(url, "rb") + + # Handle file URLs. This code handles unusual file:// patterns that + # arise on Windows: + # + # file:///c:/foo.txt + # + # which would translate to a local '/c:/foo.txt' filename that's + # invalid. Drop the forward slash for such pathnames. + # + # If you touch this code path, you should test it on both Linux and + # Windows. + # + # Some internet resources suggest using urllib.request.url2pathname() but + # but that converts forward slashes to backslashes and this causes + # its own set of problems. + if url.startswith('file://'): + filename = urllib.parse.urlparse(url).path + if re.match(r'^/[a-zA-Z]:', filename): + filename = filename[1:] + return filename if return_filename else open(filename, "rb") + + assert is_url(url) + + # Lookup from cache. + if cache_dir is None: + cache_dir = make_cache_dir_path('downloads') + + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + filename = cache_files[0] + return filename if return_filename else open(filename, "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive download quota exceeded -- please try again later") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except KeyboardInterrupt: + raise + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + if return_filename: + return cache_file + + # Return data as file object. + assert not return_filename + return io.BytesIO(url_data) diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/docker_run.sh b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/docker_run.sh new file mode 100644 index 0000000000000000000000000000000000000000..72bbe6a05e8ca02907a6afb129acacc5348a242e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/docker_run.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +set -e + +# Wrapper script for setting up `docker run` to properly +# cache downloaded files, custom extension builds and +# mount the source directory into the container and make it +# run as non-root user. +# +# Use it like: +# +# ./docker_run.sh python generate.py --help +# +# To override the default `stylegan2ada:latest` image, run: +# +# IMAGE=my_image:v1.0 ./docker_run.sh python generate.py --help +# + +rest=$@ + +IMAGE="${IMAGE:-sg2ada:latest}" + +CONTAINER_ID=$(docker inspect --format="{{.Id}}" ${IMAGE} 2> /dev/null) +if [[ "${CONTAINER_ID}" ]]; then + docker run --shm-size=2g --gpus all -it --rm -v `pwd`:/scratch --user $(id -u):$(id -g) \ + --workdir=/scratch -e HOME=/scratch $IMAGE $@ +else + echo "Unknown container image: ${IMAGE}" + exit 1 +fi diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/generate.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/generate.py new file mode 100644 index 0000000000000000000000000000000000000000..f7f961931e4e2947a74e29289b0e354d789d7bdc --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/generate.py @@ -0,0 +1,129 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Generate images using pretrained network pickle.""" + +import os +import re +from typing import List, Optional + +import click +import dnnlib +import numpy as np +import PIL.Image +import torch + +import legacy + +#---------------------------------------------------------------------------- + +def num_range(s: str) -> List[int]: + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +@click.command() +@click.pass_context +@click.option('--network', 'network_pkl', help='Network pickle filename', required=True) +@click.option('--seeds', type=num_range, help='List of random seeds') +@click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True) +@click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)') +@click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True) +@click.option('--projected-w', help='Projection result file', type=str, metavar='FILE') +@click.option('--outdir', help='Where to save the output images', type=str, required=True, metavar='DIR') +def generate_images( + ctx: click.Context, + network_pkl: str, + seeds: Optional[List[int]], + truncation_psi: float, + noise_mode: str, + outdir: str, + class_idx: Optional[int], + projected_w: Optional[str] +): + """Generate images using pretrained network pickle. + + Examples: + + \b + # Generate curated MetFaces images without truncation (Fig.10 left) + python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl + + \b + # Generate uncurated MetFaces images with truncation (Fig.12 upper left) + python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl + + \b + # Generate class conditional CIFAR-10 images (Fig.17 left, Car) + python generate.py --outdir=out --seeds=0-35 --class=1 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/cifar10.pkl + + \b + # Render an image from projected W + python generate.py --outdir=out --projected_w=projected_w.npz \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl + """ + + print('Loading networks from "%s"...' % network_pkl) + device = torch.device('cuda') + with dnnlib.util.open_url(network_pkl) as f: + G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore + + os.makedirs(outdir, exist_ok=True) + + # Synthesize the result of a W projection. + if projected_w is not None: + if seeds is not None: + print ('warn: --seeds is ignored when using --projected-w') + print(f'Generating images from projected W "{projected_w}"') + ws = np.load(projected_w)['w'] + ws = torch.tensor(ws, device=device) # pylint: disable=not-callable + assert ws.shape[1:] == (G.num_ws, G.w_dim) + for idx, w in enumerate(ws): + img = G.synthesis(w.unsqueeze(0), noise_mode=noise_mode) + img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) + img = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/proj{idx:02d}.png') + return + + if seeds is None: + ctx.fail('--seeds option is required when not using --projected-w') + + # Labels. + label = torch.zeros([1, G.c_dim], device=device) + if G.c_dim != 0: + if class_idx is None: + ctx.fail('Must specify class label with --class when using a conditional network') + label[:, class_idx] = 1 + else: + if class_idx is not None: + print ('warn: --class=lbl ignored when running on an unconditional network') + + # Generate images. + for seed_idx, seed in enumerate(seeds): + print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) + z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(device) + img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode) + img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) + PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/seed{seed:04d}.png') + + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + generate_images() # pylint: disable=no-value-for-parameter + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/legacy.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..c1fd801b831f6289e872092515e71d07b4b8bb3e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/legacy.py @@ -0,0 +1,320 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import click +import pickle +import re +import copy +import numpy as np +import torch +import dnnlib +from torch_utils import misc + +#---------------------------------------------------------------------------- + +def load_network_pkl(f, force_fp16=False): + data = _LegacyUnpickler(f).load() + + # Legacy TensorFlow pickle => convert. + if isinstance(data, tuple) and len(data) == 3 and all(isinstance(net, _TFNetworkStub) for net in data): + tf_G, tf_D, tf_Gs = data + G = convert_tf_generator(tf_G) + D = convert_tf_discriminator(tf_D) + G_ema = convert_tf_generator(tf_Gs) + data = dict(G=G, D=D, G_ema=G_ema) + + # Add missing fields. + if 'training_set_kwargs' not in data: + data['training_set_kwargs'] = None + if 'augment_pipe' not in data: + data['augment_pipe'] = None + + # Validate contents. + assert isinstance(data['G'], torch.nn.Module) + assert isinstance(data['D'], torch.nn.Module) + assert isinstance(data['G_ema'], torch.nn.Module) + assert isinstance(data['training_set_kwargs'], (dict, type(None))) + assert isinstance(data['augment_pipe'], (torch.nn.Module, type(None))) + + # Force FP16. + if force_fp16: + for key in ['G', 'D', 'G_ema']: + old = data[key] + kwargs = copy.deepcopy(old.init_kwargs) + if key.startswith('G'): + kwargs.synthesis_kwargs = dnnlib.EasyDict(kwargs.get('synthesis_kwargs', {})) + kwargs.synthesis_kwargs.num_fp16_res = 4 + kwargs.synthesis_kwargs.conv_clamp = 256 + if key.startswith('D'): + kwargs.num_fp16_res = 4 + kwargs.conv_clamp = 256 + if kwargs != old.init_kwargs: + new = type(old)(**kwargs).eval().requires_grad_(False) + misc.copy_params_and_buffers(old, new, require_all=True) + data[key] = new + return data + +#---------------------------------------------------------------------------- + +class _TFNetworkStub(dnnlib.EasyDict): + pass + +class _LegacyUnpickler(pickle.Unpickler): + def find_class(self, module, name): + if module == 'dnnlib.tflib.network' and name == 'Network': + return _TFNetworkStub + return super().find_class(module, name) + +#---------------------------------------------------------------------------- + +def _collect_tf_params(tf_net): + # pylint: disable=protected-access + tf_params = dict() + def recurse(prefix, tf_net): + for name, value in tf_net.variables: + tf_params[prefix + name] = value + for name, comp in tf_net.components.items(): + recurse(prefix + name + '/', comp) + recurse('', tf_net) + return tf_params + +#---------------------------------------------------------------------------- + +def _populate_module_params(module, *patterns): + for name, tensor in misc.named_params_and_buffers(module): + found = False + value = None + for pattern, value_fn in zip(patterns[0::2], patterns[1::2]): + match = re.fullmatch(pattern, name) + if match: + found = True + if value_fn is not None: + value = value_fn(*match.groups()) + break + try: + assert found + if value is not None: + tensor.copy_(torch.from_numpy(np.array(value))) + except: + print(name, list(tensor.shape)) + raise + +#---------------------------------------------------------------------------- + +def convert_tf_generator(tf_G): + if tf_G.version < 4: + raise ValueError('TensorFlow pickle version too low') + + # Collect kwargs. + tf_kwargs = tf_G.static_kwargs + known_kwargs = set() + def kwarg(tf_name, default=None, none=None): + known_kwargs.add(tf_name) + val = tf_kwargs.get(tf_name, default) + return val if val is not None else none + + # Convert kwargs. + kwargs = dnnlib.EasyDict( + z_dim = kwarg('latent_size', 512), + c_dim = kwarg('label_size', 0), + w_dim = kwarg('dlatent_size', 512), + img_resolution = kwarg('resolution', 1024), + img_channels = kwarg('num_channels', 3), + mapping_kwargs = dnnlib.EasyDict( + num_layers = kwarg('mapping_layers', 8), + embed_features = kwarg('label_fmaps', None), + layer_features = kwarg('mapping_fmaps', None), + activation = kwarg('mapping_nonlinearity', 'lrelu'), + lr_multiplier = kwarg('mapping_lrmul', 0.01), + w_avg_beta = kwarg('w_avg_beta', 0.995, none=1), + ), + synthesis_kwargs = dnnlib.EasyDict( + channel_base = kwarg('fmap_base', 16384) * 2, + channel_max = kwarg('fmap_max', 512), + num_fp16_res = kwarg('num_fp16_res', 0), + conv_clamp = kwarg('conv_clamp', None), + architecture = kwarg('architecture', 'skip'), + resample_filter = kwarg('resample_kernel', [1,3,3,1]), + use_noise = kwarg('use_noise', True), + activation = kwarg('nonlinearity', 'lrelu'), + ), + ) + + # Check for unknown kwargs. + kwarg('truncation_psi') + kwarg('truncation_cutoff') + kwarg('style_mixing_prob') + kwarg('structure') + unknown_kwargs = list(set(tf_kwargs.keys()) - known_kwargs) + if len(unknown_kwargs) > 0: + raise ValueError('Unknown TensorFlow kwarg', unknown_kwargs[0]) + + # Collect params. + tf_params = _collect_tf_params(tf_G) + for name, value in list(tf_params.items()): + match = re.fullmatch(r'ToRGB_lod(\d+)/(.*)', name) + if match: + r = kwargs.img_resolution // (2 ** int(match.group(1))) + tf_params[f'{r}x{r}/ToRGB/{match.group(2)}'] = value + kwargs.synthesis.kwargs.architecture = 'orig' + #for name, value in tf_params.items(): print(f'{name:<50s}{list(value.shape)}') + + # Convert params. + from training import networks + G = networks.Generator(**kwargs).eval().requires_grad_(False) + # pylint: disable=unnecessary-lambda + _populate_module_params(G, + r'mapping\.w_avg', lambda: tf_params[f'dlatent_avg'], + r'mapping\.embed\.weight', lambda: tf_params[f'mapping/LabelEmbed/weight'].transpose(), + r'mapping\.embed\.bias', lambda: tf_params[f'mapping/LabelEmbed/bias'], + r'mapping\.fc(\d+)\.weight', lambda i: tf_params[f'mapping/Dense{i}/weight'].transpose(), + r'mapping\.fc(\d+)\.bias', lambda i: tf_params[f'mapping/Dense{i}/bias'], + r'synthesis\.b4\.const', lambda: tf_params[f'synthesis/4x4/Const/const'][0], + r'synthesis\.b4\.conv1\.weight', lambda: tf_params[f'synthesis/4x4/Conv/weight'].transpose(3, 2, 0, 1), + r'synthesis\.b4\.conv1\.bias', lambda: tf_params[f'synthesis/4x4/Conv/bias'], + r'synthesis\.b4\.conv1\.noise_const', lambda: tf_params[f'synthesis/noise0'][0, 0], + r'synthesis\.b4\.conv1\.noise_strength', lambda: tf_params[f'synthesis/4x4/Conv/noise_strength'], + r'synthesis\.b4\.conv1\.affine\.weight', lambda: tf_params[f'synthesis/4x4/Conv/mod_weight'].transpose(), + r'synthesis\.b4\.conv1\.affine\.bias', lambda: tf_params[f'synthesis/4x4/Conv/mod_bias'] + 1, + r'synthesis\.b(\d+)\.conv0\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/Conv0_up/weight'][::-1, ::-1].transpose(3, 2, 0, 1), + r'synthesis\.b(\d+)\.conv0\.bias', lambda r: tf_params[f'synthesis/{r}x{r}/Conv0_up/bias'], + r'synthesis\.b(\d+)\.conv0\.noise_const', lambda r: tf_params[f'synthesis/noise{int(np.log2(int(r)))*2-5}'][0, 0], + r'synthesis\.b(\d+)\.conv0\.noise_strength', lambda r: tf_params[f'synthesis/{r}x{r}/Conv0_up/noise_strength'], + r'synthesis\.b(\d+)\.conv0\.affine\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/Conv0_up/mod_weight'].transpose(), + r'synthesis\.b(\d+)\.conv0\.affine\.bias', lambda r: tf_params[f'synthesis/{r}x{r}/Conv0_up/mod_bias'] + 1, + r'synthesis\.b(\d+)\.conv1\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/Conv1/weight'].transpose(3, 2, 0, 1), + r'synthesis\.b(\d+)\.conv1\.bias', lambda r: tf_params[f'synthesis/{r}x{r}/Conv1/bias'], + r'synthesis\.b(\d+)\.conv1\.noise_const', lambda r: tf_params[f'synthesis/noise{int(np.log2(int(r)))*2-4}'][0, 0], + r'synthesis\.b(\d+)\.conv1\.noise_strength', lambda r: tf_params[f'synthesis/{r}x{r}/Conv1/noise_strength'], + r'synthesis\.b(\d+)\.conv1\.affine\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/Conv1/mod_weight'].transpose(), + r'synthesis\.b(\d+)\.conv1\.affine\.bias', lambda r: tf_params[f'synthesis/{r}x{r}/Conv1/mod_bias'] + 1, + r'synthesis\.b(\d+)\.torgb\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/ToRGB/weight'].transpose(3, 2, 0, 1), + r'synthesis\.b(\d+)\.torgb\.bias', lambda r: tf_params[f'synthesis/{r}x{r}/ToRGB/bias'], + r'synthesis\.b(\d+)\.torgb\.affine\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/ToRGB/mod_weight'].transpose(), + r'synthesis\.b(\d+)\.torgb\.affine\.bias', lambda r: tf_params[f'synthesis/{r}x{r}/ToRGB/mod_bias'] + 1, + r'synthesis\.b(\d+)\.skip\.weight', lambda r: tf_params[f'synthesis/{r}x{r}/Skip/weight'][::-1, ::-1].transpose(3, 2, 0, 1), + r'.*\.resample_filter', None, + ) + return G + +#---------------------------------------------------------------------------- + +def convert_tf_discriminator(tf_D): + if tf_D.version < 4: + raise ValueError('TensorFlow pickle version too low') + + # Collect kwargs. + tf_kwargs = tf_D.static_kwargs + known_kwargs = set() + def kwarg(tf_name, default=None): + known_kwargs.add(tf_name) + return tf_kwargs.get(tf_name, default) + + # Convert kwargs. + kwargs = dnnlib.EasyDict( + c_dim = kwarg('label_size', 0), + img_resolution = kwarg('resolution', 1024), + img_channels = kwarg('num_channels', 3), + architecture = kwarg('architecture', 'resnet'), + channel_base = kwarg('fmap_base', 16384) * 2, + channel_max = kwarg('fmap_max', 512), + num_fp16_res = kwarg('num_fp16_res', 0), + conv_clamp = kwarg('conv_clamp', None), + cmap_dim = kwarg('mapping_fmaps', None), + block_kwargs = dnnlib.EasyDict( + activation = kwarg('nonlinearity', 'lrelu'), + resample_filter = kwarg('resample_kernel', [1,3,3,1]), + freeze_layers = kwarg('freeze_layers', 0), + ), + mapping_kwargs = dnnlib.EasyDict( + num_layers = kwarg('mapping_layers', 0), + embed_features = kwarg('mapping_fmaps', None), + layer_features = kwarg('mapping_fmaps', None), + activation = kwarg('nonlinearity', 'lrelu'), + lr_multiplier = kwarg('mapping_lrmul', 0.1), + ), + epilogue_kwargs = dnnlib.EasyDict( + mbstd_group_size = kwarg('mbstd_group_size', None), + mbstd_num_channels = kwarg('mbstd_num_features', 1), + activation = kwarg('nonlinearity', 'lrelu'), + ), + ) + + # Check for unknown kwargs. + kwarg('structure') + unknown_kwargs = list(set(tf_kwargs.keys()) - known_kwargs) + if len(unknown_kwargs) > 0: + raise ValueError('Unknown TensorFlow kwarg', unknown_kwargs[0]) + + # Collect params. + tf_params = _collect_tf_params(tf_D) + for name, value in list(tf_params.items()): + match = re.fullmatch(r'FromRGB_lod(\d+)/(.*)', name) + if match: + r = kwargs.img_resolution // (2 ** int(match.group(1))) + tf_params[f'{r}x{r}/FromRGB/{match.group(2)}'] = value + kwargs.architecture = 'orig' + #for name, value in tf_params.items(): print(f'{name:<50s}{list(value.shape)}') + + # Convert params. + from training import networks + D = networks.Discriminator(**kwargs).eval().requires_grad_(False) + # pylint: disable=unnecessary-lambda + _populate_module_params(D, + r'b(\d+)\.fromrgb\.weight', lambda r: tf_params[f'{r}x{r}/FromRGB/weight'].transpose(3, 2, 0, 1), + r'b(\d+)\.fromrgb\.bias', lambda r: tf_params[f'{r}x{r}/FromRGB/bias'], + r'b(\d+)\.conv(\d+)\.weight', lambda r, i: tf_params[f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/weight'].transpose(3, 2, 0, 1), + r'b(\d+)\.conv(\d+)\.bias', lambda r, i: tf_params[f'{r}x{r}/Conv{i}{["","_down"][int(i)]}/bias'], + r'b(\d+)\.skip\.weight', lambda r: tf_params[f'{r}x{r}/Skip/weight'].transpose(3, 2, 0, 1), + r'mapping\.embed\.weight', lambda: tf_params[f'LabelEmbed/weight'].transpose(), + r'mapping\.embed\.bias', lambda: tf_params[f'LabelEmbed/bias'], + r'mapping\.fc(\d+)\.weight', lambda i: tf_params[f'Mapping{i}/weight'].transpose(), + r'mapping\.fc(\d+)\.bias', lambda i: tf_params[f'Mapping{i}/bias'], + r'b4\.conv\.weight', lambda: tf_params[f'4x4/Conv/weight'].transpose(3, 2, 0, 1), + r'b4\.conv\.bias', lambda: tf_params[f'4x4/Conv/bias'], + r'b4\.fc\.weight', lambda: tf_params[f'4x4/Dense0/weight'].transpose(), + r'b4\.fc\.bias', lambda: tf_params[f'4x4/Dense0/bias'], + r'b4\.out\.weight', lambda: tf_params[f'Output/weight'].transpose(), + r'b4\.out\.bias', lambda: tf_params[f'Output/bias'], + r'.*\.resample_filter', None, + ) + return D + +#---------------------------------------------------------------------------- + +@click.command() +@click.option('--source', help='Input pickle', required=True, metavar='PATH') +@click.option('--dest', help='Output pickle', required=True, metavar='PATH') +@click.option('--force-fp16', help='Force the networks to use FP16', type=bool, default=False, metavar='BOOL', show_default=True) +def convert_network_pickle(source, dest, force_fp16): + """Convert legacy network pickle into the native PyTorch format. + + The tool is able to load the main network configurations exported using the TensorFlow version of StyleGAN2 or StyleGAN2-ADA. + It does not support e.g. StyleGAN2-ADA comparison methods, StyleGAN2 configs A-D, or StyleGAN1 networks. + + Example: + + \b + python legacy.py \\ + --source=https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-cat-config-f.pkl \\ + --dest=stylegan2-cat-config-f.pkl + """ + print(f'Loading "{source}"...') + with dnnlib.util.open_url(source) as f: + data = load_network_pkl(f, force_fp16=force_fp16) + print(f'Saving "{dest}"...') + with open(dest, 'wb') as f: + pickle.dump(data, f) + print('Done.') + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + convert_network_pickle() # pylint: disable=no-value-for-parameter + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1e1a5ba99e56a56ecaa14f7d4fa41777789c0cf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/frechet_inception_distance.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/frechet_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..bcfbb0d987e050567323b5d9ddffd585ccf7c1aa --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/frechet_inception_distance.py @@ -0,0 +1,41 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Frechet Inception Distance (FID) from the paper +"GANs trained by a two time-scale update rule converge to a local Nash +equilibrium". Matches the original implementation by Heusel et al. at +https://github.com/bioinf-jku/TTUR/blob/master/fid.py""" + +import numpy as np +import scipy.linalg +from . import metric_utils + +#---------------------------------------------------------------------------- + +def compute_fid(opts, max_real, num_gen): + # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' + detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. + + mu_real, sigma_real = metric_utils.compute_feature_stats_for_dataset( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + rel_lo=0, rel_hi=0, capture_mean_cov=True, max_items=max_real).get_mean_cov() + + mu_gen, sigma_gen = metric_utils.compute_feature_stats_for_generator( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + rel_lo=0, rel_hi=1, capture_mean_cov=True, max_items=num_gen).get_mean_cov() + + if opts.rank != 0: + return float('nan') + + m = np.square(mu_gen - mu_real).sum() + s, _ = scipy.linalg.sqrtm(np.dot(sigma_gen, sigma_real), disp=False) # pylint: disable=no-member + fid = np.real(m + np.trace(sigma_gen + sigma_real - s * 2)) + return float(fid) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/inception_score.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/inception_score.py new file mode 100644 index 0000000000000000000000000000000000000000..4158667c73a4b84b9e3fa749af959ebdc8688411 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/inception_score.py @@ -0,0 +1,38 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Inception Score (IS) from the paper "Improved techniques for training +GANs". Matches the original implementation by Salimans et al. at +https://github.com/openai/improved-gan/blob/master/inception_score/model.py""" + +import numpy as np +from . import metric_utils + +#---------------------------------------------------------------------------- + +def compute_is(opts, num_gen, num_splits): + # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' + detector_kwargs = dict(no_output_bias=True) # Match the original implementation by not applying bias in the softmax layer. + + gen_probs = metric_utils.compute_feature_stats_for_generator( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + capture_all=True, max_items=num_gen).get_all() + + if opts.rank != 0: + return float('nan'), float('nan') + + scores = [] + for i in range(num_splits): + part = gen_probs[i * num_gen // num_splits : (i + 1) * num_gen // num_splits] + kl = part * (np.log(part) - np.log(np.mean(part, axis=0, keepdims=True))) + kl = np.mean(np.sum(kl, axis=1)) + scores.append(np.exp(kl)) + return float(np.mean(scores)), float(np.std(scores)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/kernel_inception_distance.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/kernel_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..12b4a95e6c628a45f0e8a618c7e943e04fa62d69 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/kernel_inception_distance.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Kernel Inception Distance (KID) from the paper "Demystifying MMD +GANs". Matches the original implementation by Binkowski et al. at +https://github.com/mbinkowski/MMD-GAN/blob/master/gan/compute_scores.py""" + +import numpy as np +from . import metric_utils + +#---------------------------------------------------------------------------- + +def compute_kid(opts, max_real, num_gen, num_subsets, max_subset_size): + # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' + detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. + + real_features = metric_utils.compute_feature_stats_for_dataset( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all() + + gen_features = metric_utils.compute_feature_stats_for_generator( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all() + + if opts.rank != 0: + return float('nan') + + n = real_features.shape[1] + m = min(min(real_features.shape[0], gen_features.shape[0]), max_subset_size) + t = 0 + for _subset_idx in range(num_subsets): + x = gen_features[np.random.choice(gen_features.shape[0], m, replace=False)] + y = real_features[np.random.choice(real_features.shape[0], m, replace=False)] + a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3 + b = (x @ y.T / n + 1) ** 3 + t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m + kid = t / num_subsets / m + return float(kid) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/metric_main.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/metric_main.py new file mode 100644 index 0000000000000000000000000000000000000000..7f658ae9e5bde8a5176e842ca5a9a618b00e0faa --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/metric_main.py @@ -0,0 +1,152 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import os +import time +import json +import torch +import dnnlib + +from . import metric_utils +from . import frechet_inception_distance +from . import kernel_inception_distance +from . import precision_recall +from . import perceptual_path_length +from . import inception_score + +#---------------------------------------------------------------------------- + +_metric_dict = dict() # name => fn + +def register_metric(fn): + assert callable(fn) + _metric_dict[fn.__name__] = fn + return fn + +def is_valid_metric(metric): + return metric in _metric_dict + +def list_valid_metrics(): + return list(_metric_dict.keys()) + +#---------------------------------------------------------------------------- + +def calc_metric(metric, **kwargs): # See metric_utils.MetricOptions for the full list of arguments. + assert is_valid_metric(metric) + opts = metric_utils.MetricOptions(**kwargs) + + # Calculate. + start_time = time.time() + results = _metric_dict[metric](opts) + total_time = time.time() - start_time + + # Broadcast results. + for key, value in list(results.items()): + if opts.num_gpus > 1: + value = torch.as_tensor(value, dtype=torch.float64, device=opts.device) + torch.distributed.broadcast(tensor=value, src=0) + value = float(value.cpu()) + results[key] = value + + # Decorate with metadata. + return dnnlib.EasyDict( + results = dnnlib.EasyDict(results), + metric = metric, + total_time = total_time, + total_time_str = dnnlib.util.format_time(total_time), + num_gpus = opts.num_gpus, + ) + +#---------------------------------------------------------------------------- + +def report_metric(result_dict, run_dir=None, snapshot_pkl=None): + metric = result_dict['metric'] + assert is_valid_metric(metric) + if run_dir is not None and snapshot_pkl is not None: + snapshot_pkl = os.path.relpath(snapshot_pkl, run_dir) + + jsonl_line = json.dumps(dict(result_dict, snapshot_pkl=snapshot_pkl, timestamp=time.time())) + print(jsonl_line) + if run_dir is not None and os.path.isdir(run_dir): + with open(os.path.join(run_dir, f'metric-{metric}.jsonl'), 'at') as f: + f.write(jsonl_line + '\n') + +#---------------------------------------------------------------------------- +# Primary metrics. + +@register_metric +def fid50k_full(opts): + opts.dataset_kwargs.update(max_size=None, xflip=False) + fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=50000) + return dict(fid50k_full=fid) + +@register_metric +def kid50k_full(opts): + opts.dataset_kwargs.update(max_size=None, xflip=False) + kid = kernel_inception_distance.compute_kid(opts, max_real=1000000, num_gen=50000, num_subsets=100, max_subset_size=1000) + return dict(kid50k_full=kid) + +@register_metric +def pr50k3_full(opts): + opts.dataset_kwargs.update(max_size=None, xflip=False) + precision, recall = precision_recall.compute_pr(opts, max_real=200000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) + return dict(pr50k3_full_precision=precision, pr50k3_full_recall=recall) + +@register_metric +def ppl2_wend(opts): + ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, batch_size=2) + return dict(ppl2_wend=ppl) + +@register_metric +def is50k(opts): + opts.dataset_kwargs.update(max_size=None, xflip=False) + mean, std = inception_score.compute_is(opts, num_gen=50000, num_splits=10) + return dict(is50k_mean=mean, is50k_std=std) + +#---------------------------------------------------------------------------- +# Legacy metrics. + +@register_metric +def fid50k(opts): + opts.dataset_kwargs.update(max_size=None) + fid = frechet_inception_distance.compute_fid(opts, max_real=50000, num_gen=50000) + return dict(fid50k=fid) + +@register_metric +def kid50k(opts): + opts.dataset_kwargs.update(max_size=None) + kid = kernel_inception_distance.compute_kid(opts, max_real=50000, num_gen=50000, num_subsets=100, max_subset_size=1000) + return dict(kid50k=kid) + +@register_metric +def pr50k3(opts): + opts.dataset_kwargs.update(max_size=None) + precision, recall = precision_recall.compute_pr(opts, max_real=50000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) + return dict(pr50k3_precision=precision, pr50k3_recall=recall) + +@register_metric +def ppl_zfull(opts): + ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, batch_size=2) + return dict(ppl_zfull=ppl) + +@register_metric +def ppl_wfull(opts): + ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, batch_size=2) + return dict(ppl_wfull=ppl) + +@register_metric +def ppl_zend(opts): + ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, batch_size=2) + return dict(ppl_zend=ppl) + +@register_metric +def ppl_wend(opts): + ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, batch_size=2) + return dict(ppl_wend=ppl) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/metric_utils.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/metric_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8b13c43173473f113971007c4a5ca859749c63dc --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/metric_utils.py @@ -0,0 +1,275 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import os +import time +import hashlib +import pickle +import copy +import uuid +import numpy as np +import torch +import dnnlib + +#---------------------------------------------------------------------------- + +class MetricOptions: + def __init__(self, G=None, G_kwargs={}, dataset_kwargs={}, num_gpus=1, rank=0, device=None, progress=None, cache=True): + assert 0 <= rank < num_gpus + self.G = G + self.G_kwargs = dnnlib.EasyDict(G_kwargs) + self.dataset_kwargs = dnnlib.EasyDict(dataset_kwargs) + self.num_gpus = num_gpus + self.rank = rank + self.device = device if device is not None else torch.device('cuda', rank) + self.progress = progress.sub() if progress is not None and rank == 0 else ProgressMonitor() + self.cache = cache + +#---------------------------------------------------------------------------- + +_feature_detector_cache = dict() + +def get_feature_detector_name(url): + return os.path.splitext(url.split('/')[-1])[0] + +def get_feature_detector(url, device=torch.device('cpu'), num_gpus=1, rank=0, verbose=False): + assert 0 <= rank < num_gpus + key = (url, device) + if key not in _feature_detector_cache: + is_leader = (rank == 0) + if not is_leader and num_gpus > 1: + torch.distributed.barrier() # leader goes first + with dnnlib.util.open_url(url, verbose=(verbose and is_leader)) as f: + _feature_detector_cache[key] = torch.jit.load(f).eval().to(device) + if is_leader and num_gpus > 1: + torch.distributed.barrier() # others follow + return _feature_detector_cache[key] + +#---------------------------------------------------------------------------- + +class FeatureStats: + def __init__(self, capture_all=False, capture_mean_cov=False, max_items=None): + self.capture_all = capture_all + self.capture_mean_cov = capture_mean_cov + self.max_items = max_items + self.num_items = 0 + self.num_features = None + self.all_features = None + self.raw_mean = None + self.raw_cov = None + + def set_num_features(self, num_features): + if self.num_features is not None: + assert num_features == self.num_features + else: + self.num_features = num_features + self.all_features = [] + self.raw_mean = np.zeros([num_features], dtype=np.float64) + self.raw_cov = np.zeros([num_features, num_features], dtype=np.float64) + + def is_full(self): + return (self.max_items is not None) and (self.num_items >= self.max_items) + + def append(self, x): + x = np.asarray(x, dtype=np.float32) + assert x.ndim == 2 + if (self.max_items is not None) and (self.num_items + x.shape[0] > self.max_items): + if self.num_items >= self.max_items: + return + x = x[:self.max_items - self.num_items] + + self.set_num_features(x.shape[1]) + self.num_items += x.shape[0] + if self.capture_all: + self.all_features.append(x) + if self.capture_mean_cov: + x64 = x.astype(np.float64) + self.raw_mean += x64.sum(axis=0) + self.raw_cov += x64.T @ x64 + + def append_torch(self, x, num_gpus=1, rank=0): + assert isinstance(x, torch.Tensor) and x.ndim == 2 + assert 0 <= rank < num_gpus + if num_gpus > 1: + ys = [] + for src in range(num_gpus): + y = x.clone() + torch.distributed.broadcast(y, src=src) + ys.append(y) + x = torch.stack(ys, dim=1).flatten(0, 1) # interleave samples + self.append(x.cpu().numpy()) + + def get_all(self): + assert self.capture_all + return np.concatenate(self.all_features, axis=0) + + def get_all_torch(self): + return torch.from_numpy(self.get_all()) + + def get_mean_cov(self): + assert self.capture_mean_cov + mean = self.raw_mean / self.num_items + cov = self.raw_cov / self.num_items + cov = cov - np.outer(mean, mean) + return mean, cov + + def save(self, pkl_file): + with open(pkl_file, 'wb') as f: + pickle.dump(self.__dict__, f) + + @staticmethod + def load(pkl_file): + with open(pkl_file, 'rb') as f: + s = dnnlib.EasyDict(pickle.load(f)) + obj = FeatureStats(capture_all=s.capture_all, max_items=s.max_items) + obj.__dict__.update(s) + return obj + +#---------------------------------------------------------------------------- + +class ProgressMonitor: + def __init__(self, tag=None, num_items=None, flush_interval=1000, verbose=False, progress_fn=None, pfn_lo=0, pfn_hi=1000, pfn_total=1000): + self.tag = tag + self.num_items = num_items + self.verbose = verbose + self.flush_interval = flush_interval + self.progress_fn = progress_fn + self.pfn_lo = pfn_lo + self.pfn_hi = pfn_hi + self.pfn_total = pfn_total + self.start_time = time.time() + self.batch_time = self.start_time + self.batch_items = 0 + if self.progress_fn is not None: + self.progress_fn(self.pfn_lo, self.pfn_total) + + def update(self, cur_items): + assert (self.num_items is None) or (cur_items <= self.num_items) + if (cur_items < self.batch_items + self.flush_interval) and (self.num_items is None or cur_items < self.num_items): + return + cur_time = time.time() + total_time = cur_time - self.start_time + time_per_item = (cur_time - self.batch_time) / max(cur_items - self.batch_items, 1) + if (self.verbose) and (self.tag is not None): + print(f'{self.tag:<19s} items {cur_items:<7d} time {dnnlib.util.format_time(total_time):<12s} ms/item {time_per_item*1e3:.2f}') + self.batch_time = cur_time + self.batch_items = cur_items + + if (self.progress_fn is not None) and (self.num_items is not None): + self.progress_fn(self.pfn_lo + (self.pfn_hi - self.pfn_lo) * (cur_items / self.num_items), self.pfn_total) + + def sub(self, tag=None, num_items=None, flush_interval=1000, rel_lo=0, rel_hi=1): + return ProgressMonitor( + tag = tag, + num_items = num_items, + flush_interval = flush_interval, + verbose = self.verbose, + progress_fn = self.progress_fn, + pfn_lo = self.pfn_lo + (self.pfn_hi - self.pfn_lo) * rel_lo, + pfn_hi = self.pfn_lo + (self.pfn_hi - self.pfn_lo) * rel_hi, + pfn_total = self.pfn_total, + ) + +#---------------------------------------------------------------------------- + +def compute_feature_stats_for_dataset(opts, detector_url, detector_kwargs, rel_lo=0, rel_hi=1, batch_size=64, data_loader_kwargs=None, max_items=None, **stats_kwargs): + dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs) + if data_loader_kwargs is None: + data_loader_kwargs = dict(pin_memory=True, num_workers=3, prefetch_factor=2) + + # Try to lookup from cache. + cache_file = None + if opts.cache: + # Choose cache file name. + args = dict(dataset_kwargs=opts.dataset_kwargs, detector_url=detector_url, detector_kwargs=detector_kwargs, stats_kwargs=stats_kwargs) + md5 = hashlib.md5(repr(sorted(args.items())).encode('utf-8')) + cache_tag = f'{dataset.name}-{get_feature_detector_name(detector_url)}-{md5.hexdigest()}' + cache_file = dnnlib.make_cache_dir_path('gan-metrics', cache_tag + '.pkl') + + # Check if the file exists (all processes must agree). + flag = os.path.isfile(cache_file) if opts.rank == 0 else False + if opts.num_gpus > 1: + flag = torch.as_tensor(flag, dtype=torch.float32, device=opts.device) + torch.distributed.broadcast(tensor=flag, src=0) + flag = (float(flag.cpu()) != 0) + + # Load. + if flag: + return FeatureStats.load(cache_file) + + # Initialize. + num_items = len(dataset) + if max_items is not None: + num_items = min(num_items, max_items) + stats = FeatureStats(max_items=num_items, **stats_kwargs) + progress = opts.progress.sub(tag='dataset features', num_items=num_items, rel_lo=rel_lo, rel_hi=rel_hi) + detector = get_feature_detector(url=detector_url, device=opts.device, num_gpus=opts.num_gpus, rank=opts.rank, verbose=progress.verbose) + + # Main loop. + item_subset = [(i * opts.num_gpus + opts.rank) % num_items for i in range((num_items - 1) // opts.num_gpus + 1)] + for images, _labels in torch.utils.data.DataLoader(dataset=dataset, sampler=item_subset, batch_size=batch_size, **data_loader_kwargs): + if images.shape[1] == 1: + images = images.repeat([1, 3, 1, 1]) + features = detector(images.to(opts.device), **detector_kwargs) + stats.append_torch(features, num_gpus=opts.num_gpus, rank=opts.rank) + progress.update(stats.num_items) + + # Save to cache. + if cache_file is not None and opts.rank == 0: + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + temp_file = cache_file + '.' + uuid.uuid4().hex + stats.save(temp_file) + os.replace(temp_file, cache_file) # atomic + return stats + +#---------------------------------------------------------------------------- + +def compute_feature_stats_for_generator(opts, detector_url, detector_kwargs, rel_lo=0, rel_hi=1, batch_size=64, batch_gen=None, jit=False, **stats_kwargs): + if batch_gen is None: + batch_gen = min(batch_size, 4) + assert batch_size % batch_gen == 0 + + # Setup generator and load labels. + G = copy.deepcopy(opts.G).eval().requires_grad_(False).to(opts.device) + dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs) + + # Image generation func. + def run_generator(z, c): + img = G(z=z, c=c, **opts.G_kwargs) + img = (img * 127.5 + 128).clamp(0, 255).to(torch.uint8) + return img + + # JIT. + if jit: + z = torch.zeros([batch_gen, G.z_dim], device=opts.device) + c = torch.zeros([batch_gen, G.c_dim], device=opts.device) + run_generator = torch.jit.trace(run_generator, [z, c], check_trace=False) + + # Initialize. + stats = FeatureStats(**stats_kwargs) + assert stats.max_items is not None + progress = opts.progress.sub(tag='generator features', num_items=stats.max_items, rel_lo=rel_lo, rel_hi=rel_hi) + detector = get_feature_detector(url=detector_url, device=opts.device, num_gpus=opts.num_gpus, rank=opts.rank, verbose=progress.verbose) + + # Main loop. + while not stats.is_full(): + images = [] + for _i in range(batch_size // batch_gen): + z = torch.randn([batch_gen, G.z_dim], device=opts.device) + c = [dataset.get_label(np.random.randint(len(dataset))) for _i in range(batch_gen)] + c = torch.from_numpy(np.stack(c)).pin_memory().to(opts.device) + images.append(run_generator(z, c)) + images = torch.cat(images) + if images.shape[1] == 1: + images = images.repeat([1, 3, 1, 1]) + features = detector(images, **detector_kwargs) + stats.append_torch(features, num_gpus=opts.num_gpus, rank=opts.rank) + progress.update(stats.num_items) + return stats + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/perceptual_path_length.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/perceptual_path_length.py new file mode 100644 index 0000000000000000000000000000000000000000..8d2c3a44aececa58a7c5602e14a24d424e51bf14 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/perceptual_path_length.py @@ -0,0 +1,131 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Perceptual Path Length (PPL) from the paper "A Style-Based Generator +Architecture for Generative Adversarial Networks". Matches the original +implementation by Karras et al. at +https://github.com/NVlabs/stylegan/blob/master/metrics/perceptual_path_length.py""" + +import copy +import numpy as np +import torch +import dnnlib +from . import metric_utils + +#---------------------------------------------------------------------------- + +# Spherical interpolation of a batch of vectors. +def slerp(a, b, t): + a = a / a.norm(dim=-1, keepdim=True) + b = b / b.norm(dim=-1, keepdim=True) + d = (a * b).sum(dim=-1, keepdim=True) + p = t * torch.acos(d) + c = b - d * a + c = c / c.norm(dim=-1, keepdim=True) + d = a * torch.cos(p) + c * torch.sin(p) + d = d / d.norm(dim=-1, keepdim=True) + return d + +#---------------------------------------------------------------------------- + +class PPLSampler(torch.nn.Module): + def __init__(self, G, G_kwargs, epsilon, space, sampling, crop, vgg16): + assert space in ['z', 'w'] + assert sampling in ['full', 'end'] + super().__init__() + self.G = copy.deepcopy(G) + self.G_kwargs = G_kwargs + self.epsilon = epsilon + self.space = space + self.sampling = sampling + self.crop = crop + self.vgg16 = copy.deepcopy(vgg16) + + def forward(self, c): + # Generate random latents and interpolation t-values. + t = torch.rand([c.shape[0]], device=c.device) * (1 if self.sampling == 'full' else 0) + z0, z1 = torch.randn([c.shape[0] * 2, self.G.z_dim], device=c.device).chunk(2) + + # Interpolate in W or Z. + if self.space == 'w': + w0, w1 = self.G.mapping(z=torch.cat([z0,z1]), c=torch.cat([c,c])).chunk(2) + wt0 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2)) + wt1 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2) + self.epsilon) + else: # space == 'z' + zt0 = slerp(z0, z1, t.unsqueeze(1)) + zt1 = slerp(z0, z1, t.unsqueeze(1) + self.epsilon) + wt0, wt1 = self.G.mapping(z=torch.cat([zt0,zt1]), c=torch.cat([c,c])).chunk(2) + + # Randomize noise buffers. + for name, buf in self.G.named_buffers(): + if name.endswith('.noise_const'): + buf.copy_(torch.randn_like(buf)) + + # Generate images. + img = self.G.synthesis(ws=torch.cat([wt0,wt1]), noise_mode='const', force_fp32=True, **self.G_kwargs) + + # Center crop. + if self.crop: + assert img.shape[2] == img.shape[3] + c = img.shape[2] // 8 + img = img[:, :, c*3 : c*7, c*2 : c*6] + + # Downsample to 256x256. + factor = self.G.img_resolution // 256 + if factor > 1: + img = img.reshape([-1, img.shape[1], img.shape[2] // factor, factor, img.shape[3] // factor, factor]).mean([3, 5]) + + # Scale dynamic range from [-1,1] to [0,255]. + img = (img + 1) * (255 / 2) + if self.G.img_channels == 1: + img = img.repeat([1, 3, 1, 1]) + + # Evaluate differential LPIPS. + lpips_t0, lpips_t1 = self.vgg16(img, resize_images=False, return_lpips=True).chunk(2) + dist = (lpips_t0 - lpips_t1).square().sum(1) / self.epsilon ** 2 + return dist + +#---------------------------------------------------------------------------- + +def compute_ppl(opts, num_samples, epsilon, space, sampling, crop, batch_size, jit=False): + dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs) + vgg16_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' + vgg16 = metric_utils.get_feature_detector(vgg16_url, num_gpus=opts.num_gpus, rank=opts.rank, verbose=opts.progress.verbose) + + # Setup sampler. + sampler = PPLSampler(G=opts.G, G_kwargs=opts.G_kwargs, epsilon=epsilon, space=space, sampling=sampling, crop=crop, vgg16=vgg16) + sampler.eval().requires_grad_(False).to(opts.device) + if jit: + c = torch.zeros([batch_size, opts.G.c_dim], device=opts.device) + sampler = torch.jit.trace(sampler, [c], check_trace=False) + + # Sampling loop. + dist = [] + progress = opts.progress.sub(tag='ppl sampling', num_items=num_samples) + for batch_start in range(0, num_samples, batch_size * opts.num_gpus): + progress.update(batch_start) + c = [dataset.get_label(np.random.randint(len(dataset))) for _i in range(batch_size)] + c = torch.from_numpy(np.stack(c)).pin_memory().to(opts.device) + x = sampler(c) + for src in range(opts.num_gpus): + y = x.clone() + if opts.num_gpus > 1: + torch.distributed.broadcast(y, src=src) + dist.append(y) + progress.update(num_samples) + + # Compute PPL. + if opts.rank != 0: + return float('nan') + dist = torch.cat(dist)[:num_samples].cpu().numpy() + lo = np.percentile(dist, 1, interpolation='lower') + hi = np.percentile(dist, 99, interpolation='higher') + ppl = np.extract(np.logical_and(dist >= lo, dist <= hi), dist).mean() + return float(ppl) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/precision_recall.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/precision_recall.py new file mode 100644 index 0000000000000000000000000000000000000000..9b4b98574f9cf8d23ac14831471db2e1021ba501 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/metrics/precision_recall.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Precision/Recall (PR) from the paper "Improved Precision and Recall +Metric for Assessing Generative Models". Matches the original implementation +by Kynkaanniemi et al. at +https://github.com/kynkaat/improved-precision-and-recall-metric/blob/master/precision_recall.py""" + +import torch +from . import metric_utils + +#---------------------------------------------------------------------------- + +def compute_distances(row_features, col_features, num_gpus, rank, col_batch_size): + assert 0 <= rank < num_gpus + num_cols = col_features.shape[0] + num_batches = ((num_cols - 1) // col_batch_size // num_gpus + 1) * num_gpus + col_batches = torch.nn.functional.pad(col_features, [0, 0, 0, -num_cols % num_batches]).chunk(num_batches) + dist_batches = [] + for col_batch in col_batches[rank :: num_gpus]: + dist_batch = torch.cdist(row_features.unsqueeze(0), col_batch.unsqueeze(0))[0] + for src in range(num_gpus): + dist_broadcast = dist_batch.clone() + if num_gpus > 1: + torch.distributed.broadcast(dist_broadcast, src=src) + dist_batches.append(dist_broadcast.cpu() if rank == 0 else None) + return torch.cat(dist_batches, dim=1)[:, :num_cols] if rank == 0 else None + +#---------------------------------------------------------------------------- + +def compute_pr(opts, max_real, num_gen, nhood_size, row_batch_size, col_batch_size): + detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' + detector_kwargs = dict(return_features=True) + + real_features = metric_utils.compute_feature_stats_for_dataset( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all_torch().to(torch.float16).to(opts.device) + + gen_features = metric_utils.compute_feature_stats_for_generator( + opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, + rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all_torch().to(torch.float16).to(opts.device) + + results = dict() + for name, manifold, probes in [('precision', real_features, gen_features), ('recall', gen_features, real_features)]: + kth = [] + for manifold_batch in manifold.split(row_batch_size): + dist = compute_distances(row_features=manifold_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) + kth.append(dist.to(torch.float32).kthvalue(nhood_size + 1).values.to(torch.float16) if opts.rank == 0 else None) + kth = torch.cat(kth) if opts.rank == 0 else None + pred = [] + for probes_batch in probes.split(row_batch_size): + dist = compute_distances(row_features=probes_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) + pred.append((dist <= kth).any(dim=1) if opts.rank == 0 else None) + results[name] = float(torch.cat(pred).to(torch.float32).mean() if opts.rank == 0 else 'nan') + return results['precision'], results['recall'] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/projector.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/projector.py new file mode 100644 index 0000000000000000000000000000000000000000..36041a08619a602304deb603a6769dbfed9437c8 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/projector.py @@ -0,0 +1,212 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Project given image to the latent space of pretrained network pickle.""" + +import copy +import os +from time import perf_counter + +import click +import imageio +import numpy as np +import PIL.Image +import torch +import torch.nn.functional as F + +import dnnlib +import legacy + +def project( + G, + target: torch.Tensor, # [C,H,W] and dynamic range [0,255], W & H must match G output resolution + *, + num_steps = 1000, + w_avg_samples = 10000, + initial_learning_rate = 0.1, + initial_noise_factor = 0.05, + lr_rampdown_length = 0.25, + lr_rampup_length = 0.05, + noise_ramp_length = 0.75, + regularize_noise_weight = 1e5, + verbose = False, + device: torch.device +): + assert target.shape == (G.img_channels, G.img_resolution, G.img_resolution) + + def logprint(*args): + if verbose: + print(*args) + + G = copy.deepcopy(G).eval().requires_grad_(False).to(device) # type: ignore + + # Compute w stats. + logprint(f'Computing W midpoint and stddev using {w_avg_samples} samples...') + z_samples = np.random.RandomState(123).randn(w_avg_samples, G.z_dim) + w_samples = G.mapping(torch.from_numpy(z_samples).to(device), None) # [N, L, C] + w_samples = w_samples[:, :1, :].cpu().numpy().astype(np.float32) # [N, 1, C] + w_avg = np.mean(w_samples, axis=0, keepdims=True) # [1, 1, C] + w_std = (np.sum((w_samples - w_avg) ** 2) / w_avg_samples) ** 0.5 + + # Setup noise inputs. + noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name } + + # Load VGG16 feature detector. + url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' + with dnnlib.util.open_url(url) as f: + vgg16 = torch.jit.load(f).eval().to(device) + + # Features for target image. + target_images = target.unsqueeze(0).to(device).to(torch.float32) + if target_images.shape[2] > 256: + target_images = F.interpolate(target_images, size=(256, 256), mode='area') + target_features = vgg16(target_images, resize_images=False, return_lpips=True) + + w_opt = torch.tensor(w_avg, dtype=torch.float32, device=device, requires_grad=True) # pylint: disable=not-callable + w_out = torch.zeros([num_steps] + list(w_opt.shape[1:]), dtype=torch.float32, device=device) + optimizer = torch.optim.Adam([w_opt] + list(noise_bufs.values()), betas=(0.9, 0.999), lr=initial_learning_rate) + + # Init noise. + for buf in noise_bufs.values(): + buf[:] = torch.randn_like(buf) + buf.requires_grad = True + + for step in range(num_steps): + # Learning rate schedule. + t = step / num_steps + w_noise_scale = w_std * initial_noise_factor * max(0.0, 1.0 - t / noise_ramp_length) ** 2 + lr_ramp = min(1.0, (1.0 - t) / lr_rampdown_length) + lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) + lr_ramp = lr_ramp * min(1.0, t / lr_rampup_length) + lr = initial_learning_rate * lr_ramp + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + # Synth images from opt_w. + w_noise = torch.randn_like(w_opt) * w_noise_scale + ws = (w_opt + w_noise).repeat([1, G.mapping.num_ws, 1]) + synth_images = G.synthesis(ws, noise_mode='const') + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + synth_images = (synth_images + 1) * (255/2) + if synth_images.shape[2] > 256: + synth_images = F.interpolate(synth_images, size=(256, 256), mode='area') + + # Features for synth images. + synth_features = vgg16(synth_images, resize_images=False, return_lpips=True) + dist = (target_features - synth_features).square().sum() + + # Noise regularization. + reg_loss = 0.0 + for v in noise_bufs.values(): + noise = v[None,None,:,:] # must be [1,1,H,W] for F.avg_pool2d() + while True: + reg_loss += (noise*torch.roll(noise, shifts=1, dims=3)).mean()**2 + reg_loss += (noise*torch.roll(noise, shifts=1, dims=2)).mean()**2 + if noise.shape[2] <= 8: + break + noise = F.avg_pool2d(noise, kernel_size=2) + loss = dist + reg_loss * regularize_noise_weight + + # Step + optimizer.zero_grad(set_to_none=True) + loss.backward() + optimizer.step() + logprint(f'step {step+1:>4d}/{num_steps}: dist {dist:<4.2f} loss {float(loss):<5.2f}') + + # Save projected W for each optimization step. + w_out[step] = w_opt.detach()[0] + + # Normalize noise. + with torch.no_grad(): + for buf in noise_bufs.values(): + buf -= buf.mean() + buf *= buf.square().mean().rsqrt() + + return w_out.repeat([1, G.mapping.num_ws, 1]) + +#---------------------------------------------------------------------------- + +@click.command() +@click.option('--network', 'network_pkl', help='Network pickle filename', required=True) +@click.option('--target', 'target_fname', help='Target image file to project to', required=True, metavar='FILE') +@click.option('--num-steps', help='Number of optimization steps', type=int, default=1000, show_default=True) +@click.option('--seed', help='Random seed', type=int, default=303, show_default=True) +@click.option('--save-video', help='Save an mp4 video of optimization progress', type=bool, default=True, show_default=True) +@click.option('--outdir', help='Where to save the output images', required=True, metavar='DIR') +def run_projection( + network_pkl: str, + target_fname: str, + outdir: str, + save_video: bool, + seed: int, + num_steps: int +): + """Project given image to the latent space of pretrained network pickle. + + Examples: + + \b + python projector.py --outdir=out --target=~/mytargetimg.png \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl + """ + np.random.seed(seed) + torch.manual_seed(seed) + + # Load networks. + print('Loading networks from "%s"...' % network_pkl) + device = torch.device('cuda') + with dnnlib.util.open_url(network_pkl) as fp: + G = legacy.load_network_pkl(fp)['G_ema'].requires_grad_(False).to(device) # type: ignore + + # Load target image. + target_pil = PIL.Image.open(target_fname).convert('RGB') + w, h = target_pil.size + s = min(w, h) + target_pil = target_pil.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2)) + target_pil = target_pil.resize((G.img_resolution, G.img_resolution), PIL.Image.LANCZOS) + target_uint8 = np.array(target_pil, dtype=np.uint8) + + # Optimize projection. + start_time = perf_counter() + projected_w_steps = project( + G, + target=torch.tensor(target_uint8.transpose([2, 0, 1]), device=device), # pylint: disable=not-callable + num_steps=num_steps, + device=device, + verbose=True + ) + print (f'Elapsed: {(perf_counter()-start_time):.1f} s') + + # Render debug output: optional video and projected image and W vector. + os.makedirs(outdir, exist_ok=True) + if save_video: + video = imageio.get_writer(f'{outdir}/proj.mp4', mode='I', fps=10, codec='libx264', bitrate='16M') + print (f'Saving optimization progress video "{outdir}/proj.mp4"') + for projected_w in projected_w_steps: + synth_image = G.synthesis(projected_w.unsqueeze(0), noise_mode='const') + synth_image = (synth_image + 1) * (255/2) + synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8)[0].cpu().numpy() + video.append_data(np.concatenate([target_uint8, synth_image], axis=1)) + video.close() + + # Save final projected frame and W vector. + target_pil.save(f'{outdir}/target.png') + projected_w = projected_w_steps[-1] + synth_image = G.synthesis(projected_w.unsqueeze(0), noise_mode='const') + synth_image = (synth_image + 1) * (255/2) + synth_image = synth_image.permute(0, 2, 3, 1).clamp(0, 255).to(torch.uint8)[0].cpu().numpy() + PIL.Image.fromarray(synth_image, 'RGB').save(f'{outdir}/proj.png') + np.savez(f'{outdir}/projected_w.npz', w=projected_w.unsqueeze(0).cpu().numpy()) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + run_projection() # pylint: disable=no-value-for-parameter + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/style_mixing.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/style_mixing.py new file mode 100644 index 0000000000000000000000000000000000000000..c47bebbc44c0126b6fd00a55b8b487dc7b159653 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/style_mixing.py @@ -0,0 +1,118 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Generate style mixing image matrix using pretrained network pickle.""" + +import os +import re +from typing import List + +import click +import dnnlib +import numpy as np +import PIL.Image +import torch + +import legacy + +#---------------------------------------------------------------------------- + +def num_range(s: str) -> List[int]: + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +@click.command() +@click.option('--network', 'network_pkl', help='Network pickle filename', required=True) +@click.option('--rows', 'row_seeds', type=num_range, help='Random seeds to use for image rows', required=True) +@click.option('--cols', 'col_seeds', type=num_range, help='Random seeds to use for image columns', required=True) +@click.option('--styles', 'col_styles', type=num_range, help='Style layer range', default='0-6', show_default=True) +@click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True) +@click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True) +@click.option('--outdir', type=str, required=True) +def generate_style_mix( + network_pkl: str, + row_seeds: List[int], + col_seeds: List[int], + col_styles: List[int], + truncation_psi: float, + noise_mode: str, + outdir: str +): + """Generate images using pretrained network pickle. + + Examples: + + \b + python style_mixing.py --outdir=out --rows=85,100,75,458,1500 --cols=55,821,1789,293 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl + """ + print('Loading networks from "%s"...' % network_pkl) + device = torch.device('cuda') + with dnnlib.util.open_url(network_pkl) as f: + G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore + + os.makedirs(outdir, exist_ok=True) + + print('Generating W vectors...') + all_seeds = list(set(row_seeds + col_seeds)) + all_z = np.stack([np.random.RandomState(seed).randn(G.z_dim) for seed in all_seeds]) + all_w = G.mapping(torch.from_numpy(all_z).to(device), None) + w_avg = G.mapping.w_avg + all_w = w_avg + (all_w - w_avg) * truncation_psi + w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} + + print('Generating images...') + all_images = G.synthesis(all_w, noise_mode=noise_mode) + all_images = (all_images.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8).cpu().numpy() + image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} + + print('Generating style-mixed images...') + for row_seed in row_seeds: + for col_seed in col_seeds: + w = w_dict[row_seed].clone() + w[col_styles] = w_dict[col_seed][col_styles] + image = G.synthesis(w[np.newaxis], noise_mode=noise_mode) + image = (image.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) + image_dict[(row_seed, col_seed)] = image[0].cpu().numpy() + + print('Saving images...') + os.makedirs(outdir, exist_ok=True) + for (row_seed, col_seed), image in image_dict.items(): + PIL.Image.fromarray(image, 'RGB').save(f'{outdir}/{row_seed}-{col_seed}.png') + + print('Saving image grid...') + W = G.img_resolution + H = G.img_resolution + canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') + for row_idx, row_seed in enumerate([0] + row_seeds): + for col_idx, col_seed in enumerate([0] + col_seeds): + if row_idx == 0 and col_idx == 0: + continue + key = (row_seed, col_seed) + if row_idx == 0: + key = (col_seed, col_seed) + if col_idx == 0: + key = (row_seed, row_seed) + canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) + canvas.save(f'{outdir}/grid.png') + + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + generate_style_mix() # pylint: disable=no-value-for-parameter + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1e1a5ba99e56a56ecaa14f7d4fa41777789c0cf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/custom_ops.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..4cc4e43fc6f6ce79f2bd68a44ba87990b9b8564e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/custom_ops.py @@ -0,0 +1,126 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import os +import glob +import torch +import torch.utils.cpp_extension +import importlib +import hashlib +import shutil +from pathlib import Path + +from torch.utils.file_baton import FileBaton + +#---------------------------------------------------------------------------- +# Global options. + +verbosity = 'brief' # Verbosity level: 'none', 'brief', 'full' + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + patterns = [ + 'C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio */vc/bin', + ] + for pattern in patterns: + matches = sorted(glob.glob(pattern)) + if len(matches): + return matches[-1] + return None + +#---------------------------------------------------------------------------- +# Main entry point for compiling and loading C++/CUDA plugins. + +_cached_plugins = dict() + +def get_plugin(module_name, sources, **build_kwargs): + assert verbosity in ['none', 'brief', 'full'] + + # Already cached? + if module_name in _cached_plugins: + return _cached_plugins[module_name] + + # Print status. + if verbosity == 'full': + print(f'Setting up PyTorch plugin "{module_name}"...') + elif verbosity == 'brief': + print(f'Setting up PyTorch plugin "{module_name}"... ', end='', flush=True) + + try: # pylint: disable=too-many-nested-blocks + # Make sure we can find the necessary compiler binaries. + if os.name == 'nt' and os.system("where cl.exe >nul 2>nul") != 0: + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + raise RuntimeError(f'Could not find MSVC/GCC/CLANG installation on this computer. Check _find_compiler_bindir() in "{__file__}".') + os.environ['PATH'] += ';' + compiler_bindir + + # Compile and load. + verbose_build = (verbosity == 'full') + + # Incremental build md5sum trickery. Copies all the input source files + # into a cached build directory under a combined md5 digest of the input + # source files. Copying is done only if the combined digest has changed. + # This keeps input file timestamps and filenames the same as in previous + # extension builds, allowing for fast incremental rebuilds. + # + # This optimization is done only in case all the source files reside in + # a single directory (just for simplicity) and if the TORCH_EXTENSIONS_DIR + # environment variable is set (we take this as a signal that the user + # actually cares about this.) + source_dirs_set = set(os.path.dirname(source) for source in sources) + if len(source_dirs_set) == 1 and ('TORCH_EXTENSIONS_DIR' in os.environ): + all_source_files = sorted(list(x for x in Path(list(source_dirs_set)[0]).iterdir() if x.is_file())) + + # Compute a combined hash digest for all source files in the same + # custom op directory (usually .cu, .cpp, .py and .h files). + hash_md5 = hashlib.md5() + for src in all_source_files: + with open(src, 'rb') as f: + hash_md5.update(f.read()) + build_dir = torch.utils.cpp_extension._get_build_directory(module_name, verbose=verbose_build) # pylint: disable=protected-access + digest_build_dir = os.path.join(build_dir, hash_md5.hexdigest()) + + if not os.path.isdir(digest_build_dir): + os.makedirs(digest_build_dir, exist_ok=True) + baton = FileBaton(os.path.join(digest_build_dir, 'lock')) + if baton.try_acquire(): + try: + for src in all_source_files: + shutil.copyfile(src, os.path.join(digest_build_dir, os.path.basename(src))) + finally: + baton.release() + else: + # Someone else is copying source files under the digest dir, + # wait until done and continue. + baton.wait() + digest_sources = [os.path.join(digest_build_dir, os.path.basename(x)) for x in sources] + torch.utils.cpp_extension.load(name=module_name, build_directory=build_dir, + verbose=verbose_build, sources=digest_sources, **build_kwargs) + else: + torch.utils.cpp_extension.load(name=module_name, verbose=verbose_build, sources=sources, **build_kwargs) + module = importlib.import_module(module_name) + + except: + if verbosity == 'brief': + print('Failed!') + raise + + # Print status and add to cache. + if verbosity == 'full': + print(f'Done setting up PyTorch plugin "{module_name}".') + elif verbosity == 'brief': + print('Done.') + _cached_plugins[module_name] = module + return module + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/misc.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..0f158cd871e1df433b018a7658ca24dbddc4ea7c --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/misc.py @@ -0,0 +1,262 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import re +import contextlib +import numpy as np +import torch +import warnings +import dnnlib + +#---------------------------------------------------------------------------- +# Cached construction of constant tensors. Avoids CPU=>GPU copy when the +# same constant is used multiple times. + +_constant_cache = dict() + +def constant(value, shape=None, dtype=None, device=None, memory_format=None): + value = np.asarray(value) + if shape is not None: + shape = tuple(shape) + if dtype is None: + dtype = torch.get_default_dtype() + if device is None: + device = torch.device('cpu') + if memory_format is None: + memory_format = torch.contiguous_format + + key = (value.shape, value.dtype, value.tobytes(), shape, dtype, device, memory_format) + tensor = _constant_cache.get(key, None) + if tensor is None: + tensor = torch.as_tensor(value.copy(), dtype=dtype, device=device) + if shape is not None: + tensor, _ = torch.broadcast_tensors(tensor, torch.empty(shape)) + tensor = tensor.contiguous(memory_format=memory_format) + _constant_cache[key] = tensor + return tensor + +#---------------------------------------------------------------------------- +# Replace NaN/Inf with specified numerical values. + +try: + nan_to_num = torch.nan_to_num # 1.8.0a0 +except AttributeError: + def nan_to_num(input, nan=0.0, posinf=None, neginf=None, *, out=None): # pylint: disable=redefined-builtin + assert isinstance(input, torch.Tensor) + if posinf is None: + posinf = torch.finfo(input.dtype).max + if neginf is None: + neginf = torch.finfo(input.dtype).min + assert nan == 0 + return torch.clamp(input.unsqueeze(0).nansum(0), min=neginf, max=posinf, out=out) + +#---------------------------------------------------------------------------- +# Symbolic assert. + +try: + symbolic_assert = torch._assert # 1.8.0a0 # pylint: disable=protected-access +except AttributeError: + symbolic_assert = torch.Assert # 1.7.0 + +#---------------------------------------------------------------------------- +# Context manager to suppress known warnings in torch.jit.trace(). + +class suppress_tracer_warnings(warnings.catch_warnings): + def __enter__(self): + super().__enter__() + warnings.simplefilter('ignore', category=torch.jit.TracerWarning) + return self + +#---------------------------------------------------------------------------- +# Assert that the shape of a tensor matches the given list of integers. +# None indicates that the size of a dimension is allowed to vary. +# Performs symbolic assertion when used in torch.jit.trace(). + +def assert_shape(tensor, ref_shape): + if tensor.ndim != len(ref_shape): + raise AssertionError(f'Wrong number of dimensions: got {tensor.ndim}, expected {len(ref_shape)}') + for idx, (size, ref_size) in enumerate(zip(tensor.shape, ref_shape)): + if ref_size is None: + pass + elif isinstance(ref_size, torch.Tensor): + with suppress_tracer_warnings(): # as_tensor results are registered as constants + symbolic_assert(torch.equal(torch.as_tensor(size), ref_size), f'Wrong size for dimension {idx}') + elif isinstance(size, torch.Tensor): + with suppress_tracer_warnings(): # as_tensor results are registered as constants + symbolic_assert(torch.equal(size, torch.as_tensor(ref_size)), f'Wrong size for dimension {idx}: expected {ref_size}') + elif size != ref_size: + raise AssertionError(f'Wrong size for dimension {idx}: got {size}, expected {ref_size}') + +#---------------------------------------------------------------------------- +# Function decorator that calls torch.autograd.profiler.record_function(). + +def profiled_function(fn): + def decorator(*args, **kwargs): + with torch.autograd.profiler.record_function(fn.__name__): + return fn(*args, **kwargs) + decorator.__name__ = fn.__name__ + return decorator + +#---------------------------------------------------------------------------- +# Sampler for torch.utils.data.DataLoader that loops over the dataset +# indefinitely, shuffling items as it goes. + +class InfiniteSampler(torch.utils.data.Sampler): + def __init__(self, dataset, rank=0, num_replicas=1, shuffle=True, seed=0, window_size=0.5): + assert len(dataset) > 0 + assert num_replicas > 0 + assert 0 <= rank < num_replicas + assert 0 <= window_size <= 1 + super().__init__(dataset) + self.dataset = dataset + self.rank = rank + self.num_replicas = num_replicas + self.shuffle = shuffle + self.seed = seed + self.window_size = window_size + + def __iter__(self): + order = np.arange(len(self.dataset)) + rnd = None + window = 0 + if self.shuffle: + rnd = np.random.RandomState(self.seed) + rnd.shuffle(order) + window = int(np.rint(order.size * self.window_size)) + + idx = 0 + while True: + i = idx % order.size + if idx % self.num_replicas == self.rank: + yield order[i] + if window >= 2: + j = (i - rnd.randint(window)) % order.size + order[i], order[j] = order[j], order[i] + idx += 1 + +#---------------------------------------------------------------------------- +# Utilities for operating with torch.nn.Module parameters and buffers. + +def params_and_buffers(module): + assert isinstance(module, torch.nn.Module) + return list(module.parameters()) + list(module.buffers()) + +def named_params_and_buffers(module): + assert isinstance(module, torch.nn.Module) + return list(module.named_parameters()) + list(module.named_buffers()) + +def copy_params_and_buffers(src_module, dst_module, require_all=False): + assert isinstance(src_module, torch.nn.Module) + assert isinstance(dst_module, torch.nn.Module) + src_tensors = {name: tensor for name, tensor in named_params_and_buffers(src_module)} + for name, tensor in named_params_and_buffers(dst_module): + assert (name in src_tensors) or (not require_all) + if name in src_tensors: + tensor.copy_(src_tensors[name].detach()).requires_grad_(tensor.requires_grad) + +#---------------------------------------------------------------------------- +# Context manager for easily enabling/disabling DistributedDataParallel +# synchronization. + +@contextlib.contextmanager +def ddp_sync(module, sync): + assert isinstance(module, torch.nn.Module) + if sync or not isinstance(module, torch.nn.parallel.DistributedDataParallel): + yield + else: + with module.no_sync(): + yield + +#---------------------------------------------------------------------------- +# Check DistributedDataParallel consistency across processes. + +def check_ddp_consistency(module, ignore_regex=None): + assert isinstance(module, torch.nn.Module) + for name, tensor in named_params_and_buffers(module): + fullname = type(module).__name__ + '.' + name + if ignore_regex is not None and re.fullmatch(ignore_regex, fullname): + continue + tensor = tensor.detach() + other = tensor.clone() + torch.distributed.broadcast(tensor=other, src=0) + assert (nan_to_num(tensor) == nan_to_num(other)).all(), fullname + +#---------------------------------------------------------------------------- +# Print summary table of module hierarchy. + +def print_module_summary(module, inputs, max_nesting=3, skip_redundant=True): + assert isinstance(module, torch.nn.Module) + assert not isinstance(module, torch.jit.ScriptModule) + assert isinstance(inputs, (tuple, list)) + + # Register hooks. + entries = [] + nesting = [0] + def pre_hook(_mod, _inputs): + nesting[0] += 1 + def post_hook(mod, _inputs, outputs): + nesting[0] -= 1 + if nesting[0] <= max_nesting: + outputs = list(outputs) if isinstance(outputs, (tuple, list)) else [outputs] + outputs = [t for t in outputs if isinstance(t, torch.Tensor)] + entries.append(dnnlib.EasyDict(mod=mod, outputs=outputs)) + hooks = [mod.register_forward_pre_hook(pre_hook) for mod in module.modules()] + hooks += [mod.register_forward_hook(post_hook) for mod in module.modules()] + + # Run module. + outputs = module(*inputs) + for hook in hooks: + hook.remove() + + # Identify unique outputs, parameters, and buffers. + tensors_seen = set() + for e in entries: + e.unique_params = [t for t in e.mod.parameters() if id(t) not in tensors_seen] + e.unique_buffers = [t for t in e.mod.buffers() if id(t) not in tensors_seen] + e.unique_outputs = [t for t in e.outputs if id(t) not in tensors_seen] + tensors_seen |= {id(t) for t in e.unique_params + e.unique_buffers + e.unique_outputs} + + # Filter out redundant entries. + if skip_redundant: + entries = [e for e in entries if len(e.unique_params) or len(e.unique_buffers) or len(e.unique_outputs)] + + # Construct table. + rows = [[type(module).__name__, 'Parameters', 'Buffers', 'Output shape', 'Datatype']] + rows += [['---'] * len(rows[0])] + param_total = 0 + buffer_total = 0 + submodule_names = {mod: name for name, mod in module.named_modules()} + for e in entries: + name = '' if e.mod is module else submodule_names[e.mod] + param_size = sum(t.numel() for t in e.unique_params) + buffer_size = sum(t.numel() for t in e.unique_buffers) + output_shapes = [str(list(e.outputs[0].shape)) for t in e.outputs] + output_dtypes = [str(t.dtype).split('.')[-1] for t in e.outputs] + rows += [[ + name + (':0' if len(e.outputs) >= 2 else ''), + str(param_size) if param_size else '-', + str(buffer_size) if buffer_size else '-', + (output_shapes + ['-'])[0], + (output_dtypes + ['-'])[0], + ]] + for idx in range(1, len(e.outputs)): + rows += [[name + f':{idx}', '-', '-', output_shapes[idx], output_dtypes[idx]]] + param_total += param_size + buffer_total += buffer_size + rows += [['---'] * len(rows[0])] + rows += [['Total', str(param_total), str(buffer_total), '-', '-']] + + # Print table. + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(' '.join(cell + ' ' * (width - len(cell)) for cell, width in zip(row, widths))) + print() + return outputs + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1e1a5ba99e56a56ecaa14f7d4fa41777789c0cf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.cpp b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d2425d8054991a8e8b6f7a940fd0ff7fa0bb330 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.cpp @@ -0,0 +1,99 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#include +#include +#include +#include "bias_act.h" + +//------------------------------------------------------------------------ + +static bool has_same_layout(torch::Tensor x, torch::Tensor y) +{ + if (x.dim() != y.dim()) + return false; + for (int64_t i = 0; i < x.dim(); i++) + { + if (x.size(i) != y.size(i)) + return false; + if (x.size(i) >= 2 && x.stride(i) != y.stride(i)) + return false; + } + return true; +} + +//------------------------------------------------------------------------ + +static torch::Tensor bias_act(torch::Tensor x, torch::Tensor b, torch::Tensor xref, torch::Tensor yref, torch::Tensor dy, int grad, int dim, int act, float alpha, float gain, float clamp) +{ + // Validate arguments. + TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); + TORCH_CHECK(b.numel() == 0 || (b.dtype() == x.dtype() && b.device() == x.device()), "b must have the same dtype and device as x"); + TORCH_CHECK(xref.numel() == 0 || (xref.sizes() == x.sizes() && xref.dtype() == x.dtype() && xref.device() == x.device()), "xref must have the same shape, dtype, and device as x"); + TORCH_CHECK(yref.numel() == 0 || (yref.sizes() == x.sizes() && yref.dtype() == x.dtype() && yref.device() == x.device()), "yref must have the same shape, dtype, and device as x"); + TORCH_CHECK(dy.numel() == 0 || (dy.sizes() == x.sizes() && dy.dtype() == x.dtype() && dy.device() == x.device()), "dy must have the same dtype and device as x"); + TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); + TORCH_CHECK(b.dim() == 1, "b must have rank 1"); + TORCH_CHECK(b.numel() == 0 || (dim >= 0 && dim < x.dim()), "dim is out of bounds"); + TORCH_CHECK(b.numel() == 0 || b.numel() == x.size(dim), "b has wrong number of elements"); + TORCH_CHECK(grad >= 0, "grad must be non-negative"); + + // Validate layout. + TORCH_CHECK(x.is_non_overlapping_and_dense(), "x must be non-overlapping and dense"); + TORCH_CHECK(b.is_contiguous(), "b must be contiguous"); + TORCH_CHECK(xref.numel() == 0 || has_same_layout(xref, x), "xref must have the same layout as x"); + TORCH_CHECK(yref.numel() == 0 || has_same_layout(yref, x), "yref must have the same layout as x"); + TORCH_CHECK(dy.numel() == 0 || has_same_layout(dy, x), "dy must have the same layout as x"); + + // Create output tensor. + const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); + torch::Tensor y = torch::empty_like(x); + TORCH_CHECK(has_same_layout(y, x), "y must have the same layout as x"); + + // Initialize CUDA kernel parameters. + bias_act_kernel_params p; + p.x = x.data_ptr(); + p.b = (b.numel()) ? b.data_ptr() : NULL; + p.xref = (xref.numel()) ? xref.data_ptr() : NULL; + p.yref = (yref.numel()) ? yref.data_ptr() : NULL; + p.dy = (dy.numel()) ? dy.data_ptr() : NULL; + p.y = y.data_ptr(); + p.grad = grad; + p.act = act; + p.alpha = alpha; + p.gain = gain; + p.clamp = clamp; + p.sizeX = (int)x.numel(); + p.sizeB = (int)b.numel(); + p.stepB = (b.numel()) ? (int)x.stride(dim) : 1; + + // Choose CUDA kernel. + void* kernel; + AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] + { + kernel = choose_bias_act_kernel(p); + }); + TORCH_CHECK(kernel, "no CUDA kernel found for the specified activation func"); + + // Launch CUDA kernel. + p.loopX = 4; + int blockSize = 4 * 32; + int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; + void* args[] = {&p}; + AT_CUDA_CHECK(cudaLaunchKernel(kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); + return y; +} + +//------------------------------------------------------------------------ + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("bias_act", &bias_act); +} + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.cu b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.cu new file mode 100644 index 0000000000000000000000000000000000000000..dd8fc4756d7d94727f94af738665b68d9c518880 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.cu @@ -0,0 +1,173 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#include +#include "bias_act.h" + +//------------------------------------------------------------------------ +// Helpers. + +template struct InternalType; +template <> struct InternalType { typedef double scalar_t; }; +template <> struct InternalType { typedef float scalar_t; }; +template <> struct InternalType { typedef float scalar_t; }; + +//------------------------------------------------------------------------ +// CUDA kernel. + +template +__global__ void bias_act_kernel(bias_act_kernel_params p) +{ + typedef typename InternalType::scalar_t scalar_t; + int G = p.grad; + scalar_t alpha = (scalar_t)p.alpha; + scalar_t gain = (scalar_t)p.gain; + scalar_t clamp = (scalar_t)p.clamp; + scalar_t one = (scalar_t)1; + scalar_t two = (scalar_t)2; + scalar_t expRange = (scalar_t)80; + scalar_t halfExpRange = (scalar_t)40; + scalar_t seluScale = (scalar_t)1.0507009873554804934193349852946; + scalar_t seluAlpha = (scalar_t)1.6732632423543772848170429916717; + + // Loop over elements. + int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; + for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) + { + // Load. + scalar_t x = (scalar_t)((const T*)p.x)[xi]; + scalar_t b = (p.b) ? (scalar_t)((const T*)p.b)[(xi / p.stepB) % p.sizeB] : 0; + scalar_t xref = (p.xref) ? (scalar_t)((const T*)p.xref)[xi] : 0; + scalar_t yref = (p.yref) ? (scalar_t)((const T*)p.yref)[xi] : 0; + scalar_t dy = (p.dy) ? (scalar_t)((const T*)p.dy)[xi] : one; + scalar_t yy = (gain != 0) ? yref / gain : 0; + scalar_t y = 0; + + // Apply bias. + ((G == 0) ? x : xref) += b; + + // linear + if (A == 1) + { + if (G == 0) y = x; + if (G == 1) y = x; + } + + // relu + if (A == 2) + { + if (G == 0) y = (x > 0) ? x : 0; + if (G == 1) y = (yy > 0) ? x : 0; + } + + // lrelu + if (A == 3) + { + if (G == 0) y = (x > 0) ? x : x * alpha; + if (G == 1) y = (yy > 0) ? x : x * alpha; + } + + // tanh + if (A == 4) + { + if (G == 0) { scalar_t c = exp(x); scalar_t d = one / c; y = (x < -expRange) ? -one : (x > expRange) ? one : (c - d) / (c + d); } + if (G == 1) y = x * (one - yy * yy); + if (G == 2) y = x * (one - yy * yy) * (-two * yy); + } + + // sigmoid + if (A == 5) + { + if (G == 0) y = (x < -expRange) ? 0 : one / (exp(-x) + one); + if (G == 1) y = x * yy * (one - yy); + if (G == 2) y = x * yy * (one - yy) * (one - two * yy); + } + + // elu + if (A == 6) + { + if (G == 0) y = (x >= 0) ? x : exp(x) - one; + if (G == 1) y = (yy >= 0) ? x : x * (yy + one); + if (G == 2) y = (yy >= 0) ? 0 : x * (yy + one); + } + + // selu + if (A == 7) + { + if (G == 0) y = (x >= 0) ? seluScale * x : (seluScale * seluAlpha) * (exp(x) - one); + if (G == 1) y = (yy >= 0) ? x * seluScale : x * (yy + seluScale * seluAlpha); + if (G == 2) y = (yy >= 0) ? 0 : x * (yy + seluScale * seluAlpha); + } + + // softplus + if (A == 8) + { + if (G == 0) y = (x > expRange) ? x : log(exp(x) + one); + if (G == 1) y = x * (one - exp(-yy)); + if (G == 2) { scalar_t c = exp(-yy); y = x * c * (one - c); } + } + + // swish + if (A == 9) + { + if (G == 0) + y = (x < -expRange) ? 0 : x / (exp(-x) + one); + else + { + scalar_t c = exp(xref); + scalar_t d = c + one; + if (G == 1) + y = (xref > halfExpRange) ? x : x * c * (xref + d) / (d * d); + else + y = (xref > halfExpRange) ? 0 : x * c * (xref * (two - d) + two * d) / (d * d * d); + yref = (xref < -expRange) ? 0 : xref / (exp(-xref) + one) * gain; + } + } + + // Apply gain. + y *= gain * dy; + + // Clamp. + if (clamp >= 0) + { + if (G == 0) + y = (y > -clamp & y < clamp) ? y : (y >= 0) ? clamp : -clamp; + else + y = (yref > -clamp & yref < clamp) ? y : 0; + } + + // Store. + ((T*)p.y)[xi] = (T)y; + } +} + +//------------------------------------------------------------------------ +// CUDA kernel selection. + +template void* choose_bias_act_kernel(const bias_act_kernel_params& p) +{ + if (p.act == 1) return (void*)bias_act_kernel; + if (p.act == 2) return (void*)bias_act_kernel; + if (p.act == 3) return (void*)bias_act_kernel; + if (p.act == 4) return (void*)bias_act_kernel; + if (p.act == 5) return (void*)bias_act_kernel; + if (p.act == 6) return (void*)bias_act_kernel; + if (p.act == 7) return (void*)bias_act_kernel; + if (p.act == 8) return (void*)bias_act_kernel; + if (p.act == 9) return (void*)bias_act_kernel; + return NULL; +} + +//------------------------------------------------------------------------ +// Template specializations. + +template void* choose_bias_act_kernel (const bias_act_kernel_params& p); +template void* choose_bias_act_kernel (const bias_act_kernel_params& p); +template void* choose_bias_act_kernel (const bias_act_kernel_params& p); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.h b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.h new file mode 100644 index 0000000000000000000000000000000000000000..a32187e1fb7e3bae509d4eceaf900866866875a4 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.h @@ -0,0 +1,38 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +//------------------------------------------------------------------------ +// CUDA kernel parameters. + +struct bias_act_kernel_params +{ + const void* x; // [sizeX] + const void* b; // [sizeB] or NULL + const void* xref; // [sizeX] or NULL + const void* yref; // [sizeX] or NULL + const void* dy; // [sizeX] or NULL + void* y; // [sizeX] + + int grad; + int act; + float alpha; + float gain; + float clamp; + + int sizeX; + int sizeB; + int stepB; + int loopX; +}; + +//------------------------------------------------------------------------ +// CUDA kernel selection. + +template void* choose_bias_act_kernel(const bias_act_kernel_params& p); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.py new file mode 100644 index 0000000000000000000000000000000000000000..4bcb409a89ccf6c6f6ecfca5962683df2d280b1f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/bias_act.py @@ -0,0 +1,212 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom PyTorch ops for efficient bias and activation.""" + +import os +import warnings +import numpy as np +import torch +import dnnlib +import traceback + +from .. import custom_ops +from .. import misc + +#---------------------------------------------------------------------------- + +activation_funcs = { + 'linear': dnnlib.EasyDict(func=lambda x, **_: x, def_alpha=0, def_gain=1, cuda_idx=1, ref='', has_2nd_grad=False), + 'relu': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.relu(x), def_alpha=0, def_gain=np.sqrt(2), cuda_idx=2, ref='y', has_2nd_grad=False), + 'lrelu': dnnlib.EasyDict(func=lambda x, alpha, **_: torch.nn.functional.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', has_2nd_grad=False), + 'tanh': dnnlib.EasyDict(func=lambda x, **_: torch.tanh(x), def_alpha=0, def_gain=1, cuda_idx=4, ref='y', has_2nd_grad=True), + 'sigmoid': dnnlib.EasyDict(func=lambda x, **_: torch.sigmoid(x), def_alpha=0, def_gain=1, cuda_idx=5, ref='y', has_2nd_grad=True), + 'elu': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.elu(x), def_alpha=0, def_gain=1, cuda_idx=6, ref='y', has_2nd_grad=True), + 'selu': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.selu(x), def_alpha=0, def_gain=1, cuda_idx=7, ref='y', has_2nd_grad=True), + 'softplus': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.softplus(x), def_alpha=0, def_gain=1, cuda_idx=8, ref='y', has_2nd_grad=True), + 'swish': dnnlib.EasyDict(func=lambda x, **_: torch.sigmoid(x) * x, def_alpha=0, def_gain=np.sqrt(2), cuda_idx=9, ref='x', has_2nd_grad=True), +} + +#---------------------------------------------------------------------------- + +_inited = False +_plugin = None +_null_tensor = torch.empty([0]) + +def _init(): + global _inited, _plugin + if not _inited: + _inited = True + sources = ['bias_act.cpp', 'bias_act.cu'] + sources = [os.path.join(os.path.dirname(__file__), s) for s in sources] + try: + _plugin = custom_ops.get_plugin('bias_act_plugin', sources=sources, extra_cuda_cflags=['--use_fast_math']) + except: + warnings.warn('Failed to build CUDA kernels for bias_act. Falling back to slow reference implementation. Details:\n\n' + traceback.format_exc()) + return _plugin is not None + +#---------------------------------------------------------------------------- + +def bias_act(x, b=None, dim=1, act='linear', alpha=None, gain=None, clamp=None, impl='cuda'): + r"""Fused bias and activation function. + + Adds bias `b` to activation tensor `x`, evaluates activation function `act`, + and scales the result by `gain`. Each of the steps is optional. In most cases, + the fused op is considerably more efficient than performing the same calculation + using standard PyTorch ops. It supports first and second order gradients, + but not third order gradients. + + Args: + x: Input activation tensor. Can be of any shape. + b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type + as `x`. The shape must be known, and it must match the dimension of `x` + corresponding to `dim`. + dim: The dimension in `x` corresponding to the elements of `b`. + The value of `dim` is ignored if `b` is not specified. + act: Name of the activation function to evaluate, or `"linear"` to disable. + Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. + See `activation_funcs` for a full list. `None` is not allowed. + alpha: Shape parameter for the activation function, or `None` to use the default. + gain: Scaling factor for the output tensor, or `None` to use default. + See `activation_funcs` for the default scaling of each activation function. + If unsure, consider specifying 1. + clamp: Clamp the output values to `[-clamp, +clamp]`, or `None` to disable + the clamping (default). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + assert isinstance(x, torch.Tensor) + assert impl in ['ref', 'cuda'] + if impl == 'cuda' and x.device.type == 'cuda' and _init(): + return _bias_act_cuda(dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp).apply(x, b) + return _bias_act_ref(x=x, b=b, dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp) + +#---------------------------------------------------------------------------- + +@misc.profiled_function +def _bias_act_ref(x, b=None, dim=1, act='linear', alpha=None, gain=None, clamp=None): + """Slow reference implementation of `bias_act()` using standard TensorFlow ops. + """ + assert isinstance(x, torch.Tensor) + assert clamp is None or clamp >= 0 + spec = activation_funcs[act] + alpha = float(alpha if alpha is not None else spec.def_alpha) + gain = float(gain if gain is not None else spec.def_gain) + clamp = float(clamp if clamp is not None else -1) + + # Add bias. + if b is not None: + assert isinstance(b, torch.Tensor) and b.ndim == 1 + assert 0 <= dim < x.ndim + assert b.shape[0] == x.shape[dim] + x = x + b.reshape([-1 if i == dim else 1 for i in range(x.ndim)]) + + # Evaluate activation function. + alpha = float(alpha) + x = spec.func(x, alpha=alpha) + + # Scale by gain. + gain = float(gain) + if gain != 1: + x = x * gain + + # Clamp. + if clamp >= 0: + x = x.clamp(-clamp, clamp) # pylint: disable=invalid-unary-operand-type + return x + +#---------------------------------------------------------------------------- + +_bias_act_cuda_cache = dict() + +def _bias_act_cuda(dim=1, act='linear', alpha=None, gain=None, clamp=None): + """Fast CUDA implementation of `bias_act()` using custom ops. + """ + # Parse arguments. + assert clamp is None or clamp >= 0 + spec = activation_funcs[act] + alpha = float(alpha if alpha is not None else spec.def_alpha) + gain = float(gain if gain is not None else spec.def_gain) + clamp = float(clamp if clamp is not None else -1) + + # Lookup from cache. + key = (dim, act, alpha, gain, clamp) + if key in _bias_act_cuda_cache: + return _bias_act_cuda_cache[key] + + # Forward op. + class BiasActCuda(torch.autograd.Function): + @staticmethod + def forward(ctx, x, b): # pylint: disable=arguments-differ + ctx.memory_format = torch.channels_last if x.ndim > 2 and x.stride()[1] == 1 else torch.contiguous_format + x = x.contiguous(memory_format=ctx.memory_format) + b = b.contiguous() if b is not None else _null_tensor + y = x + if act != 'linear' or gain != 1 or clamp >= 0 or b is not _null_tensor: + y = _plugin.bias_act(x, b, _null_tensor, _null_tensor, _null_tensor, 0, dim, spec.cuda_idx, alpha, gain, clamp) + ctx.save_for_backward( + x if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor, + b if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor, + y if 'y' in spec.ref else _null_tensor) + return y + + @staticmethod + def backward(ctx, dy): # pylint: disable=arguments-differ + dy = dy.contiguous(memory_format=ctx.memory_format) + x, b, y = ctx.saved_tensors + dx = None + db = None + + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + dx = dy + if act != 'linear' or gain != 1 or clamp >= 0: + dx = BiasActCudaGrad.apply(dy, x, b, y) + + if ctx.needs_input_grad[1]: + db = dx.sum([i for i in range(dx.ndim) if i != dim]) + + return dx, db + + # Backward op. + class BiasActCudaGrad(torch.autograd.Function): + @staticmethod + def forward(ctx, dy, x, b, y): # pylint: disable=arguments-differ + ctx.memory_format = torch.channels_last if dy.ndim > 2 and dy.stride()[1] == 1 else torch.contiguous_format + dx = _plugin.bias_act(dy, b, x, y, _null_tensor, 1, dim, spec.cuda_idx, alpha, gain, clamp) + ctx.save_for_backward( + dy if spec.has_2nd_grad else _null_tensor, + x, b, y) + return dx + + @staticmethod + def backward(ctx, d_dx): # pylint: disable=arguments-differ + d_dx = d_dx.contiguous(memory_format=ctx.memory_format) + dy, x, b, y = ctx.saved_tensors + d_dy = None + d_x = None + d_b = None + d_y = None + + if ctx.needs_input_grad[0]: + d_dy = BiasActCudaGrad.apply(d_dx, x, b, y) + + if spec.has_2nd_grad and (ctx.needs_input_grad[1] or ctx.needs_input_grad[2]): + d_x = _plugin.bias_act(d_dx, b, x, y, dy, 2, dim, spec.cuda_idx, alpha, gain, clamp) + + if spec.has_2nd_grad and ctx.needs_input_grad[2]: + d_b = d_x.sum([i for i in range(d_x.ndim) if i != dim]) + + return d_dy, d_x, d_b, d_y + + # Add to cache. + _bias_act_cuda_cache[key] = BiasActCuda + return BiasActCuda + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/conv2d_gradfix.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/conv2d_gradfix.py new file mode 100644 index 0000000000000000000000000000000000000000..e95e10d0b1d0315a63a76446fd4c5c293c8bbc6d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/conv2d_gradfix.py @@ -0,0 +1,170 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom replacement for `torch.nn.functional.conv2d` that supports +arbitrarily high order gradients with zero performance penalty.""" + +import warnings +import contextlib +import torch + +# pylint: disable=redefined-builtin +# pylint: disable=arguments-differ +# pylint: disable=protected-access + +#---------------------------------------------------------------------------- + +enabled = False # Enable the custom op by setting this to true. +weight_gradients_disabled = False # Forcefully disable computation of gradients with respect to the weights. + +@contextlib.contextmanager +def no_weight_gradients(): + global weight_gradients_disabled + old = weight_gradients_disabled + weight_gradients_disabled = True + yield + weight_gradients_disabled = old + +#---------------------------------------------------------------------------- + +def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1): + if _should_use_custom_op(input): + return _conv2d_gradfix(transpose=False, weight_shape=weight.shape, stride=stride, padding=padding, output_padding=0, dilation=dilation, groups=groups).apply(input, weight, bias) + return torch.nn.functional.conv2d(input=input, weight=weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups) + +def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1): + if _should_use_custom_op(input): + return _conv2d_gradfix(transpose=True, weight_shape=weight.shape, stride=stride, padding=padding, output_padding=output_padding, groups=groups, dilation=dilation).apply(input, weight, bias) + return torch.nn.functional.conv_transpose2d(input=input, weight=weight, bias=bias, stride=stride, padding=padding, output_padding=output_padding, groups=groups, dilation=dilation) + +#---------------------------------------------------------------------------- + +def _should_use_custom_op(input): + assert isinstance(input, torch.Tensor) + if (not enabled) or (not torch.backends.cudnn.enabled): + return False + if input.device.type != 'cuda': + return False + if any(torch.__version__.startswith(x) for x in ['1.7.', '1.8.', '1.9']): + return True + warnings.warn(f'conv2d_gradfix not supported on PyTorch {torch.__version__}. Falling back to torch.nn.functional.conv2d().') + return False + +def _tuple_of_ints(xs, ndim): + xs = tuple(xs) if isinstance(xs, (tuple, list)) else (xs,) * ndim + assert len(xs) == ndim + assert all(isinstance(x, int) for x in xs) + return xs + +#---------------------------------------------------------------------------- + +_conv2d_gradfix_cache = dict() + +def _conv2d_gradfix(transpose, weight_shape, stride, padding, output_padding, dilation, groups): + # Parse arguments. + ndim = 2 + weight_shape = tuple(weight_shape) + stride = _tuple_of_ints(stride, ndim) + padding = _tuple_of_ints(padding, ndim) + output_padding = _tuple_of_ints(output_padding, ndim) + dilation = _tuple_of_ints(dilation, ndim) + + # Lookup from cache. + key = (transpose, weight_shape, stride, padding, output_padding, dilation, groups) + if key in _conv2d_gradfix_cache: + return _conv2d_gradfix_cache[key] + + # Validate arguments. + assert groups >= 1 + assert len(weight_shape) == ndim + 2 + assert all(stride[i] >= 1 for i in range(ndim)) + assert all(padding[i] >= 0 for i in range(ndim)) + assert all(dilation[i] >= 0 for i in range(ndim)) + if not transpose: + assert all(output_padding[i] == 0 for i in range(ndim)) + else: # transpose + assert all(0 <= output_padding[i] < max(stride[i], dilation[i]) for i in range(ndim)) + + # Helpers. + common_kwargs = dict(stride=stride, padding=padding, dilation=dilation, groups=groups) + def calc_output_padding(input_shape, output_shape): + if transpose: + return [0, 0] + return [ + input_shape[i + 2] + - (output_shape[i + 2] - 1) * stride[i] + - (1 - 2 * padding[i]) + - dilation[i] * (weight_shape[i + 2] - 1) + for i in range(ndim) + ] + + # Forward & backward. + class Conv2d(torch.autograd.Function): + @staticmethod + def forward(ctx, input, weight, bias): + assert weight.shape == weight_shape + if not transpose: + output = torch.nn.functional.conv2d(input=input, weight=weight, bias=bias, **common_kwargs) + else: # transpose + output = torch.nn.functional.conv_transpose2d(input=input, weight=weight, bias=bias, output_padding=output_padding, **common_kwargs) + ctx.save_for_backward(input, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + input, weight = ctx.saved_tensors + grad_input = None + grad_weight = None + grad_bias = None + + if ctx.needs_input_grad[0]: + p = calc_output_padding(input_shape=input.shape, output_shape=grad_output.shape) + grad_input = _conv2d_gradfix(transpose=(not transpose), weight_shape=weight_shape, output_padding=p, **common_kwargs).apply(grad_output, weight, None) + assert grad_input.shape == input.shape + + if ctx.needs_input_grad[1] and not weight_gradients_disabled: + grad_weight = Conv2dGradWeight.apply(grad_output, input) + assert grad_weight.shape == weight_shape + + if ctx.needs_input_grad[2]: + grad_bias = grad_output.sum([0, 2, 3]) + + return grad_input, grad_weight, grad_bias + + # Gradient with respect to the weights. + class Conv2dGradWeight(torch.autograd.Function): + @staticmethod + def forward(ctx, grad_output, input): + op = torch._C._jit_get_operation('aten::cudnn_convolution_backward_weight' if not transpose else 'aten::cudnn_convolution_transpose_backward_weight') + flags = [torch.backends.cudnn.benchmark, torch.backends.cudnn.deterministic, torch.backends.cudnn.allow_tf32] + grad_weight = op(weight_shape, grad_output, input, padding, stride, dilation, groups, *flags) + assert grad_weight.shape == weight_shape + ctx.save_for_backward(grad_output, input) + return grad_weight + + @staticmethod + def backward(ctx, grad2_grad_weight): + grad_output, input = ctx.saved_tensors + grad2_grad_output = None + grad2_input = None + + if ctx.needs_input_grad[0]: + grad2_grad_output = Conv2d.apply(input, grad2_grad_weight, None) + assert grad2_grad_output.shape == grad_output.shape + + if ctx.needs_input_grad[1]: + p = calc_output_padding(input_shape=input.shape, output_shape=grad_output.shape) + grad2_input = _conv2d_gradfix(transpose=(not transpose), weight_shape=weight_shape, output_padding=p, **common_kwargs).apply(grad_output, grad2_grad_weight, None) + assert grad2_input.shape == input.shape + + return grad2_grad_output, grad2_input + + _conv2d_gradfix_cache[key] = Conv2d + return Conv2d + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/conv2d_resample.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/conv2d_resample.py new file mode 100644 index 0000000000000000000000000000000000000000..cd4750744c83354bab78704d4ef51ad1070fcc4a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/conv2d_resample.py @@ -0,0 +1,156 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""2D convolution with optional up/downsampling.""" + +import torch + +from .. import misc +from . import conv2d_gradfix +from . import upfirdn2d +from .upfirdn2d import _parse_padding +from .upfirdn2d import _get_filter_size + +#---------------------------------------------------------------------------- + +def _get_weight_shape(w): + with misc.suppress_tracer_warnings(): # this value will be treated as a constant + shape = [int(sz) for sz in w.shape] + misc.assert_shape(w, shape) + return shape + +#---------------------------------------------------------------------------- + +def _conv2d_wrapper(x, w, stride=1, padding=0, groups=1, transpose=False, flip_weight=True): + """Wrapper for the underlying `conv2d()` and `conv_transpose2d()` implementations. + """ + out_channels, in_channels_per_group, kh, kw = _get_weight_shape(w) + + # Flip weight if requested. + if not flip_weight: # conv2d() actually performs correlation (flip_weight=True) not convolution (flip_weight=False). + w = w.flip([2, 3]) + + # Workaround performance pitfall in cuDNN 8.0.5, triggered when using + # 1x1 kernel + memory_format=channels_last + less than 64 channels. + if kw == 1 and kh == 1 and stride == 1 and padding in [0, [0, 0], (0, 0)] and not transpose: + if x.stride()[1] == 1 and min(out_channels, in_channels_per_group) < 64: + if out_channels <= 4 and groups == 1: + in_shape = x.shape + x = w.squeeze(3).squeeze(2) @ x.reshape([in_shape[0], in_channels_per_group, -1]) + x = x.reshape([in_shape[0], out_channels, in_shape[2], in_shape[3]]) + else: + x = x.to(memory_format=torch.contiguous_format) + w = w.to(memory_format=torch.contiguous_format) + x = conv2d_gradfix.conv2d(x, w, groups=groups) + return x.to(memory_format=torch.channels_last) + + # Otherwise => execute using conv2d_gradfix. + op = conv2d_gradfix.conv_transpose2d if transpose else conv2d_gradfix.conv2d + return op(x, w, stride=stride, padding=padding, groups=groups) + +#---------------------------------------------------------------------------- + +@misc.profiled_function +def conv2d_resample(x, w, f=None, up=1, down=1, padding=0, groups=1, flip_weight=True, flip_filter=False): + r"""2D convolution with optional up/downsampling. + + Padding is performed only once at the beginning, not between the operations. + + Args: + x: Input tensor of shape + `[batch_size, in_channels, in_height, in_width]`. + w: Weight tensor of shape + `[out_channels, in_channels//groups, kernel_height, kernel_width]`. + f: Low-pass filter for up/downsampling. Must be prepared beforehand by + calling upfirdn2d.setup_filter(). None = identity (default). + up: Integer upsampling factor (default: 1). + down: Integer downsampling factor (default: 1). + padding: Padding with respect to the upsampled image. Can be a single number + or a list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]` + (default: 0). + groups: Split input channels into N groups (default: 1). + flip_weight: False = convolution, True = correlation (default: True). + flip_filter: False = convolution, True = correlation (default: False). + + Returns: + Tensor of the shape `[batch_size, num_channels, out_height, out_width]`. + """ + # Validate arguments. + assert isinstance(x, torch.Tensor) and (x.ndim == 4) + assert isinstance(w, torch.Tensor) and (w.ndim == 4) and (w.dtype == x.dtype) + assert f is None or (isinstance(f, torch.Tensor) and f.ndim in [1, 2] and f.dtype == torch.float32) + assert isinstance(up, int) and (up >= 1) + assert isinstance(down, int) and (down >= 1) + assert isinstance(groups, int) and (groups >= 1) + out_channels, in_channels_per_group, kh, kw = _get_weight_shape(w) + fw, fh = _get_filter_size(f) + px0, px1, py0, py1 = _parse_padding(padding) + + # Adjust padding to account for up/downsampling. + if up > 1: + px0 += (fw + up - 1) // 2 + px1 += (fw - up) // 2 + py0 += (fh + up - 1) // 2 + py1 += (fh - up) // 2 + if down > 1: + px0 += (fw - down + 1) // 2 + px1 += (fw - down) // 2 + py0 += (fh - down + 1) // 2 + py1 += (fh - down) // 2 + + # Fast path: 1x1 convolution with downsampling only => downsample first, then convolve. + if kw == 1 and kh == 1 and (down > 1 and up == 1): + x = upfirdn2d.upfirdn2d(x=x, f=f, down=down, padding=[px0,px1,py0,py1], flip_filter=flip_filter) + x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight) + return x + + # Fast path: 1x1 convolution with upsampling only => convolve first, then upsample. + if kw == 1 and kh == 1 and (up > 1 and down == 1): + x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight) + x = upfirdn2d.upfirdn2d(x=x, f=f, up=up, padding=[px0,px1,py0,py1], gain=up**2, flip_filter=flip_filter) + return x + + # Fast path: downsampling only => use strided convolution. + if down > 1 and up == 1: + x = upfirdn2d.upfirdn2d(x=x, f=f, padding=[px0,px1,py0,py1], flip_filter=flip_filter) + x = _conv2d_wrapper(x=x, w=w, stride=down, groups=groups, flip_weight=flip_weight) + return x + + # Fast path: upsampling with optional downsampling => use transpose strided convolution. + if up > 1: + if groups == 1: + w = w.transpose(0, 1) + else: + w = w.reshape(groups, out_channels // groups, in_channels_per_group, kh, kw) + w = w.transpose(1, 2) + w = w.reshape(groups * in_channels_per_group, out_channels // groups, kh, kw) + px0 -= kw - 1 + px1 -= kw - up + py0 -= kh - 1 + py1 -= kh - up + pxt = max(min(-px0, -px1), 0) + pyt = max(min(-py0, -py1), 0) + x = _conv2d_wrapper(x=x, w=w, stride=up, padding=[pyt,pxt], groups=groups, transpose=True, flip_weight=(not flip_weight)) + x = upfirdn2d.upfirdn2d(x=x, f=f, padding=[px0+pxt,px1+pxt,py0+pyt,py1+pyt], gain=up**2, flip_filter=flip_filter) + if down > 1: + x = upfirdn2d.upfirdn2d(x=x, f=f, down=down, flip_filter=flip_filter) + return x + + # Fast path: no up/downsampling, padding supported by the underlying implementation => use plain conv2d. + if up == 1 and down == 1: + if px0 == px1 and py0 == py1 and px0 >= 0 and py0 >= 0: + return _conv2d_wrapper(x=x, w=w, padding=[py0,px0], groups=groups, flip_weight=flip_weight) + + # Fallback: Generic reference implementation. + x = upfirdn2d.upfirdn2d(x=x, f=(f if up > 1 else None), up=up, padding=[px0,px1,py0,py1], gain=up**2, flip_filter=flip_filter) + x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight) + if down > 1: + x = upfirdn2d.upfirdn2d(x=x, f=f, down=down, flip_filter=flip_filter) + return x + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/fma.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/fma.py new file mode 100644 index 0000000000000000000000000000000000000000..2eeac58a626c49231e04122b93e321ada954c5d3 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/fma.py @@ -0,0 +1,60 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Fused multiply-add, with slightly faster gradients than `torch.addcmul()`.""" + +import torch + +#---------------------------------------------------------------------------- + +def fma(a, b, c): # => a * b + c + return _FusedMultiplyAdd.apply(a, b, c) + +#---------------------------------------------------------------------------- + +class _FusedMultiplyAdd(torch.autograd.Function): # a * b + c + @staticmethod + def forward(ctx, a, b, c): # pylint: disable=arguments-differ + out = torch.addcmul(c, a, b) + ctx.save_for_backward(a, b) + ctx.c_shape = c.shape + return out + + @staticmethod + def backward(ctx, dout): # pylint: disable=arguments-differ + a, b = ctx.saved_tensors + c_shape = ctx.c_shape + da = None + db = None + dc = None + + if ctx.needs_input_grad[0]: + da = _unbroadcast(dout * b, a.shape) + + if ctx.needs_input_grad[1]: + db = _unbroadcast(dout * a, b.shape) + + if ctx.needs_input_grad[2]: + dc = _unbroadcast(dout, c_shape) + + return da, db, dc + +#---------------------------------------------------------------------------- + +def _unbroadcast(x, shape): + extra_dims = x.ndim - len(shape) + assert extra_dims >= 0 + dim = [i for i in range(x.ndim) if x.shape[i] > 1 and (i < extra_dims or shape[i - extra_dims] == 1)] + if len(dim): + x = x.sum(dim=dim, keepdim=True) + if extra_dims: + x = x.reshape(-1, *x.shape[extra_dims+1:]) + assert x.shape == shape + return x + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/grid_sample_gradfix.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/grid_sample_gradfix.py new file mode 100644 index 0000000000000000000000000000000000000000..ca6b3413ea72a734703c34382c023b84523601fd --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/grid_sample_gradfix.py @@ -0,0 +1,83 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom replacement for `torch.nn.functional.grid_sample` that +supports arbitrarily high order gradients between the input and output. +Only works on 2D images and assumes +`mode='bilinear'`, `padding_mode='zeros'`, `align_corners=False`.""" + +import warnings +import torch + +# pylint: disable=redefined-builtin +# pylint: disable=arguments-differ +# pylint: disable=protected-access + +#---------------------------------------------------------------------------- + +enabled = False # Enable the custom op by setting this to true. + +#---------------------------------------------------------------------------- + +def grid_sample(input, grid): + if _should_use_custom_op(): + return _GridSample2dForward.apply(input, grid) + return torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) + +#---------------------------------------------------------------------------- + +def _should_use_custom_op(): + if not enabled: + return False + if any(torch.__version__.startswith(x) for x in ['1.7.', '1.8.', '1.9']): + return True + warnings.warn(f'grid_sample_gradfix not supported on PyTorch {torch.__version__}. Falling back to torch.nn.functional.grid_sample().') + return False + +#---------------------------------------------------------------------------- + +class _GridSample2dForward(torch.autograd.Function): + @staticmethod + def forward(ctx, input, grid): + assert input.ndim == 4 + assert grid.ndim == 4 + output = torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) + ctx.save_for_backward(input, grid) + return output + + @staticmethod + def backward(ctx, grad_output): + input, grid = ctx.saved_tensors + grad_input, grad_grid = _GridSample2dBackward.apply(grad_output, input, grid) + return grad_input, grad_grid + +#---------------------------------------------------------------------------- + +class _GridSample2dBackward(torch.autograd.Function): + @staticmethod + def forward(ctx, grad_output, input, grid): + op = torch._C._jit_get_operation('aten::grid_sampler_2d_backward') + grad_input, grad_grid = op(grad_output, input, grid, 0, 0, False) + ctx.save_for_backward(grid) + return grad_input, grad_grid + + @staticmethod + def backward(ctx, grad2_grad_input, grad2_grad_grid): + _ = grad2_grad_grid # unused + grid, = ctx.saved_tensors + grad2_grad_output = None + grad2_input = None + grad2_grid = None + + if ctx.needs_input_grad[0]: + grad2_grad_output = _GridSample2dForward.apply(grad2_grad_input, grid) + + assert not ctx.needs_input_grad[2] + return grad2_grad_output, grad2_input, grad2_grid + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.cpp b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2d7177fc60040751d20e9a8da0301fa3ab64968a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.cpp @@ -0,0 +1,103 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#include +#include +#include +#include "upfirdn2d.h" + +//------------------------------------------------------------------------ + +static torch::Tensor upfirdn2d(torch::Tensor x, torch::Tensor f, int upx, int upy, int downx, int downy, int padx0, int padx1, int pady0, int pady1, bool flip, float gain) +{ + // Validate arguments. + TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); + TORCH_CHECK(f.device() == x.device(), "f must reside on the same device as x"); + TORCH_CHECK(f.dtype() == torch::kFloat, "f must be float32"); + TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); + TORCH_CHECK(f.numel() <= INT_MAX, "f is too large"); + TORCH_CHECK(x.dim() == 4, "x must be rank 4"); + TORCH_CHECK(f.dim() == 2, "f must be rank 2"); + TORCH_CHECK(f.size(0) >= 1 && f.size(1) >= 1, "f must be at least 1x1"); + TORCH_CHECK(upx >= 1 && upy >= 1, "upsampling factor must be at least 1"); + TORCH_CHECK(downx >= 1 && downy >= 1, "downsampling factor must be at least 1"); + + // Create output tensor. + const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); + int outW = ((int)x.size(3) * upx + padx0 + padx1 - (int)f.size(1) + downx) / downx; + int outH = ((int)x.size(2) * upy + pady0 + pady1 - (int)f.size(0) + downy) / downy; + TORCH_CHECK(outW >= 1 && outH >= 1, "output must be at least 1x1"); + torch::Tensor y = torch::empty({x.size(0), x.size(1), outH, outW}, x.options(), x.suggest_memory_format()); + TORCH_CHECK(y.numel() <= INT_MAX, "output is too large"); + + // Initialize CUDA kernel parameters. + upfirdn2d_kernel_params p; + p.x = x.data_ptr(); + p.f = f.data_ptr(); + p.y = y.data_ptr(); + p.up = make_int2(upx, upy); + p.down = make_int2(downx, downy); + p.pad0 = make_int2(padx0, pady0); + p.flip = (flip) ? 1 : 0; + p.gain = gain; + p.inSize = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0)); + p.inStride = make_int4((int)x.stride(3), (int)x.stride(2), (int)x.stride(1), (int)x.stride(0)); + p.filterSize = make_int2((int)f.size(1), (int)f.size(0)); + p.filterStride = make_int2((int)f.stride(1), (int)f.stride(0)); + p.outSize = make_int4((int)y.size(3), (int)y.size(2), (int)y.size(1), (int)y.size(0)); + p.outStride = make_int4((int)y.stride(3), (int)y.stride(2), (int)y.stride(1), (int)y.stride(0)); + p.sizeMajor = (p.inStride.z == 1) ? p.inSize.w : p.inSize.w * p.inSize.z; + p.sizeMinor = (p.inStride.z == 1) ? p.inSize.z : 1; + + // Choose CUDA kernel. + upfirdn2d_kernel_spec spec; + AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] + { + spec = choose_upfirdn2d_kernel(p); + }); + + // Set looping options. + p.loopMajor = (p.sizeMajor - 1) / 16384 + 1; + p.loopMinor = spec.loopMinor; + p.loopX = spec.loopX; + p.launchMinor = (p.sizeMinor - 1) / p.loopMinor + 1; + p.launchMajor = (p.sizeMajor - 1) / p.loopMajor + 1; + + // Compute grid size. + dim3 blockSize, gridSize; + if (spec.tileOutW < 0) // large + { + blockSize = dim3(4, 32, 1); + gridSize = dim3( + ((p.outSize.y - 1) / blockSize.x + 1) * p.launchMinor, + (p.outSize.x - 1) / (blockSize.y * p.loopX) + 1, + p.launchMajor); + } + else // small + { + blockSize = dim3(256, 1, 1); + gridSize = dim3( + ((p.outSize.y - 1) / spec.tileOutH + 1) * p.launchMinor, + (p.outSize.x - 1) / (spec.tileOutW * p.loopX) + 1, + p.launchMajor); + } + + // Launch CUDA kernel. + void* args[] = {&p}; + AT_CUDA_CHECK(cudaLaunchKernel(spec.kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); + return y; +} + +//------------------------------------------------------------------------ + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("upfirdn2d", &upfirdn2d); +} + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.cu b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.cu new file mode 100644 index 0000000000000000000000000000000000000000..ebdd9879f4bb16fc57a23cbc81f9de8ef54e4916 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.cu @@ -0,0 +1,350 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#include +#include "upfirdn2d.h" + +//------------------------------------------------------------------------ +// Helpers. + +template struct InternalType; +template <> struct InternalType { typedef double scalar_t; }; +template <> struct InternalType { typedef float scalar_t; }; +template <> struct InternalType { typedef float scalar_t; }; + +static __device__ __forceinline__ int floor_div(int a, int b) +{ + int t = 1 - a / b; + return (a + t * b) / b - t; +} + +//------------------------------------------------------------------------ +// Generic CUDA implementation for large filters. + +template static __global__ void upfirdn2d_kernel_large(upfirdn2d_kernel_params p) +{ + typedef typename InternalType::scalar_t scalar_t; + + // Calculate thread index. + int minorBase = blockIdx.x * blockDim.x + threadIdx.x; + int outY = minorBase / p.launchMinor; + minorBase -= outY * p.launchMinor; + int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; + int majorBase = blockIdx.z * p.loopMajor; + if (outXBase >= p.outSize.x | outY >= p.outSize.y | majorBase >= p.sizeMajor) + return; + + // Setup Y receptive field. + int midY = outY * p.down.y + p.up.y - 1 - p.pad0.y; + int inY = min(max(floor_div(midY, p.up.y), 0), p.inSize.y); + int h = min(max(floor_div(midY + p.filterSize.y, p.up.y), 0), p.inSize.y) - inY; + int filterY = midY + p.filterSize.y - (inY + 1) * p.up.y; + if (p.flip) + filterY = p.filterSize.y - 1 - filterY; + + // Loop over major, minor, and X. + for (int majorIdx = 0, major = majorBase; majorIdx < p.loopMajor & major < p.sizeMajor; majorIdx++, major++) + for (int minorIdx = 0, minor = minorBase; minorIdx < p.loopMinor & minor < p.sizeMinor; minorIdx++, minor += p.launchMinor) + { + int nc = major * p.sizeMinor + minor; + int n = nc / p.inSize.z; + int c = nc - n * p.inSize.z; + for (int loopX = 0, outX = outXBase; loopX < p.loopX & outX < p.outSize.x; loopX++, outX += blockDim.y) + { + // Setup X receptive field. + int midX = outX * p.down.x + p.up.x - 1 - p.pad0.x; + int inX = min(max(floor_div(midX, p.up.x), 0), p.inSize.x); + int w = min(max(floor_div(midX + p.filterSize.x, p.up.x), 0), p.inSize.x) - inX; + int filterX = midX + p.filterSize.x - (inX + 1) * p.up.x; + if (p.flip) + filterX = p.filterSize.x - 1 - filterX; + + // Initialize pointers. + const T* xp = &((const T*)p.x)[inX * p.inStride.x + inY * p.inStride.y + c * p.inStride.z + n * p.inStride.w]; + const float* fp = &p.f[filterX * p.filterStride.x + filterY * p.filterStride.y]; + int filterStepX = ((p.flip) ? p.up.x : -p.up.x) * p.filterStride.x; + int filterStepY = ((p.flip) ? p.up.y : -p.up.y) * p.filterStride.y; + + // Inner loop. + scalar_t v = 0; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + v += (scalar_t)(*xp) * (scalar_t)(*fp); + xp += p.inStride.x; + fp += filterStepX; + } + xp += p.inStride.y - w * p.inStride.x; + fp += filterStepY - w * filterStepX; + } + + // Store result. + v *= p.gain; + ((T*)p.y)[outX * p.outStride.x + outY * p.outStride.y + c * p.outStride.z + n * p.outStride.w] = (T)v; + } + } +} + +//------------------------------------------------------------------------ +// Specialized CUDA implementation for small filters. + +template +static __global__ void upfirdn2d_kernel_small(upfirdn2d_kernel_params p) +{ + typedef typename InternalType::scalar_t scalar_t; + const int tileInW = ((tileOutW - 1) * downx + filterW - 1) / upx + 1; + const int tileInH = ((tileOutH - 1) * downy + filterH - 1) / upy + 1; + __shared__ volatile scalar_t sf[filterH][filterW]; + __shared__ volatile scalar_t sx[tileInH][tileInW][loopMinor]; + + // Calculate tile index. + int minorBase = blockIdx.x; + int tileOutY = minorBase / p.launchMinor; + minorBase -= tileOutY * p.launchMinor; + minorBase *= loopMinor; + tileOutY *= tileOutH; + int tileOutXBase = blockIdx.y * p.loopX * tileOutW; + int majorBase = blockIdx.z * p.loopMajor; + if (tileOutXBase >= p.outSize.x | tileOutY >= p.outSize.y | majorBase >= p.sizeMajor) + return; + + // Load filter (flipped). + for (int tapIdx = threadIdx.x; tapIdx < filterH * filterW; tapIdx += blockDim.x) + { + int fy = tapIdx / filterW; + int fx = tapIdx - fy * filterW; + scalar_t v = 0; + if (fx < p.filterSize.x & fy < p.filterSize.y) + { + int ffx = (p.flip) ? fx : p.filterSize.x - 1 - fx; + int ffy = (p.flip) ? fy : p.filterSize.y - 1 - fy; + v = (scalar_t)p.f[ffx * p.filterStride.x + ffy * p.filterStride.y]; + } + sf[fy][fx] = v; + } + + // Loop over major and X. + for (int majorIdx = 0, major = majorBase; majorIdx < p.loopMajor & major < p.sizeMajor; majorIdx++, major++) + { + int baseNC = major * p.sizeMinor + minorBase; + int n = baseNC / p.inSize.z; + int baseC = baseNC - n * p.inSize.z; + for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outSize.x; loopX++, tileOutX += tileOutW) + { + // Load input pixels. + int tileMidX = tileOutX * downx + upx - 1 - p.pad0.x; + int tileMidY = tileOutY * downy + upy - 1 - p.pad0.y; + int tileInX = floor_div(tileMidX, upx); + int tileInY = floor_div(tileMidY, upy); + __syncthreads(); + for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW * loopMinor; inIdx += blockDim.x) + { + int relC = inIdx; + int relInX = relC / loopMinor; + int relInY = relInX / tileInW; + relC -= relInX * loopMinor; + relInX -= relInY * tileInW; + int c = baseC + relC; + int inX = tileInX + relInX; + int inY = tileInY + relInY; + scalar_t v = 0; + if (inX >= 0 & inY >= 0 & inX < p.inSize.x & inY < p.inSize.y & c < p.inSize.z) + v = (scalar_t)((const T*)p.x)[inX * p.inStride.x + inY * p.inStride.y + c * p.inStride.z + n * p.inStride.w]; + sx[relInY][relInX][relC] = v; + } + + // Loop over output pixels. + __syncthreads(); + for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW * loopMinor; outIdx += blockDim.x) + { + int relC = outIdx; + int relOutX = relC / loopMinor; + int relOutY = relOutX / tileOutW; + relC -= relOutX * loopMinor; + relOutX -= relOutY * tileOutW; + int c = baseC + relC; + int outX = tileOutX + relOutX; + int outY = tileOutY + relOutY; + + // Setup receptive field. + int midX = tileMidX + relOutX * downx; + int midY = tileMidY + relOutY * downy; + int inX = floor_div(midX, upx); + int inY = floor_div(midY, upy); + int relInX = inX - tileInX; + int relInY = inY - tileInY; + int filterX = (inX + 1) * upx - midX - 1; // flipped + int filterY = (inY + 1) * upy - midY - 1; // flipped + + // Inner loop. + if (outX < p.outSize.x & outY < p.outSize.y & c < p.outSize.z) + { + scalar_t v = 0; + #pragma unroll + for (int y = 0; y < filterH / upy; y++) + #pragma unroll + for (int x = 0; x < filterW / upx; x++) + v += sx[relInY + y][relInX + x][relC] * sf[filterY + y * upy][filterX + x * upx]; + v *= p.gain; + ((T*)p.y)[outX * p.outStride.x + outY * p.outStride.y + c * p.outStride.z + n * p.outStride.w] = (T)v; + } + } + } + } +} + +//------------------------------------------------------------------------ +// CUDA kernel selection. + +template upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p) +{ + int s = p.inStride.z, fx = p.filterSize.x, fy = p.filterSize.y; + + upfirdn2d_kernel_spec spec = {(void*)upfirdn2d_kernel_large, -1,-1,1, 4}; // contiguous + if (s == 1) spec = {(void*)upfirdn2d_kernel_large, -1,-1,4, 1}; // channels_last + + if (s != 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1) // contiguous + { + if (fx <= 7 && fy <= 7 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 6 && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 5 && fy <= 5 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 4 && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 3 && fy <= 3 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 20 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 12 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 8 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 20) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 12) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + } + if (s == 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1) // channels_last + { + if (fx <= 7 && fy <= 7 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 6 && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 5 && fy <= 5 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 4 && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 3 && fy <= 3 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 20 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 12 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 8 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 20) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 12) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + } + if (s != 1 && p.up.x == 2 && p.up.y == 2 && p.down.x == 1 && p.down.y == 1) // contiguous + { + if (fx <= 8 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 6 && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 4 && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + if (fx <= 2 && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small, 64,16,1, 1}; + } + if (s == 1 && p.up.x == 2 && p.up.y == 2 && p.down.x == 1 && p.down.y == 1) // channels_last + { + if (fx <= 8 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 6 && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 4 && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + if (fx <= 2 && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small, 16,16,8, 1}; + } + if (s != 1 && p.up.x == 2 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1) // contiguous + { + if (fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 20 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 12 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + if (fx <= 8 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,8,1, 1}; + } + if (s == 1 && p.up.x == 2 && p.up.y == 1 && p.down.x == 1 && p.down.y == 1) // channels_last + { + if (fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 20 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 12 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + if (fx <= 8 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 128,1,16, 1}; + } + if (s != 1 && p.up.x == 1 && p.up.y == 2 && p.down.x == 1 && p.down.y == 1) // contiguous + { + if (fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 20) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 12) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + if (fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 32,32,1, 1}; + } + if (s == 1 && p.up.x == 1 && p.up.y == 2 && p.down.x == 1 && p.down.y == 1) // channels_last + { + if (fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 20) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 12) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + if (fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 1,128,16, 1}; + } + if (s != 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 2 && p.down.y == 2) // contiguous + { + if (fx <= 8 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 32,8,1, 1}; + if (fx <= 6 && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small, 32,8,1, 1}; + if (fx <= 4 && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small, 32,8,1, 1}; + if (fx <= 2 && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small, 32,8,1, 1}; + } + if (s == 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 2 && p.down.y == 2) // channels_last + { + if (fx <= 8 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 8,8,8, 1}; + if (fx <= 6 && fy <= 6 ) spec = {(void*)upfirdn2d_kernel_small, 8,8,8, 1}; + if (fx <= 4 && fy <= 4 ) spec = {(void*)upfirdn2d_kernel_small, 8,8,8, 1}; + if (fx <= 2 && fy <= 2 ) spec = {(void*)upfirdn2d_kernel_small, 8,8,8, 1}; + } + if (s != 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 2 && p.down.y == 1) // contiguous + { + if (fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,8,1, 1}; + if (fx <= 20 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,8,1, 1}; + if (fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,8,1, 1}; + if (fx <= 12 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,8,1, 1}; + if (fx <= 8 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,8,1, 1}; + } + if (s == 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 2 && p.down.y == 1) // channels_last + { + if (fx <= 24 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,1,8, 1}; + if (fx <= 20 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,1,8, 1}; + if (fx <= 16 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,1,8, 1}; + if (fx <= 12 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,1,8, 1}; + if (fx <= 8 && fy <= 1 ) spec = {(void*)upfirdn2d_kernel_small, 64,1,8, 1}; + } + if (s != 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 2) // contiguous + { + if (fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small, 32,16,1, 1}; + if (fx <= 1 && fy <= 20) spec = {(void*)upfirdn2d_kernel_small, 32,16,1, 1}; + if (fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small, 32,16,1, 1}; + if (fx <= 1 && fy <= 12) spec = {(void*)upfirdn2d_kernel_small, 32,16,1, 1}; + if (fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 32,16,1, 1}; + } + if (s == 1 && p.up.x == 1 && p.up.y == 1 && p.down.x == 1 && p.down.y == 2) // channels_last + { + if (fx <= 1 && fy <= 24) spec = {(void*)upfirdn2d_kernel_small, 1,64,8, 1}; + if (fx <= 1 && fy <= 20) spec = {(void*)upfirdn2d_kernel_small, 1,64,8, 1}; + if (fx <= 1 && fy <= 16) spec = {(void*)upfirdn2d_kernel_small, 1,64,8, 1}; + if (fx <= 1 && fy <= 12) spec = {(void*)upfirdn2d_kernel_small, 1,64,8, 1}; + if (fx <= 1 && fy <= 8 ) spec = {(void*)upfirdn2d_kernel_small, 1,64,8, 1}; + } + return spec; +} + +//------------------------------------------------------------------------ +// Template specializations. + +template upfirdn2d_kernel_spec choose_upfirdn2d_kernel (const upfirdn2d_kernel_params& p); +template upfirdn2d_kernel_spec choose_upfirdn2d_kernel (const upfirdn2d_kernel_params& p); +template upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.h b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.h new file mode 100644 index 0000000000000000000000000000000000000000..c9e2032bcac9d2abde7a75eea4d812da348afadd --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.h @@ -0,0 +1,59 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#include + +//------------------------------------------------------------------------ +// CUDA kernel parameters. + +struct upfirdn2d_kernel_params +{ + const void* x; + const float* f; + void* y; + + int2 up; + int2 down; + int2 pad0; + int flip; + float gain; + + int4 inSize; // [width, height, channel, batch] + int4 inStride; + int2 filterSize; // [width, height] + int2 filterStride; + int4 outSize; // [width, height, channel, batch] + int4 outStride; + int sizeMinor; + int sizeMajor; + + int loopMinor; + int loopMajor; + int loopX; + int launchMinor; + int launchMajor; +}; + +//------------------------------------------------------------------------ +// CUDA kernel specialization. + +struct upfirdn2d_kernel_spec +{ + void* kernel; + int tileOutW; + int tileOutH; + int loopMinor; + int loopX; +}; + +//------------------------------------------------------------------------ +// CUDA kernel selection. + +template upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.py new file mode 100644 index 0000000000000000000000000000000000000000..ceeac2b9834e33b7c601c28bf27f32aa91c69256 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/ops/upfirdn2d.py @@ -0,0 +1,384 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom PyTorch ops for efficient resampling of 2D images.""" + +import os +import warnings +import numpy as np +import torch +import traceback + +from .. import custom_ops +from .. import misc +from . import conv2d_gradfix + +#---------------------------------------------------------------------------- + +_inited = False +_plugin = None + +def _init(): + global _inited, _plugin + if not _inited: + sources = ['upfirdn2d.cpp', 'upfirdn2d.cu'] + sources = [os.path.join(os.path.dirname(__file__), s) for s in sources] + try: + _plugin = custom_ops.get_plugin('upfirdn2d_plugin', sources=sources, extra_cuda_cflags=['--use_fast_math']) + except: + warnings.warn('Failed to build CUDA kernels for upfirdn2d. Falling back to slow reference implementation. Details:\n\n' + traceback.format_exc()) + return _plugin is not None + +def _parse_scaling(scaling): + if isinstance(scaling, int): + scaling = [scaling, scaling] + assert isinstance(scaling, (list, tuple)) + assert all(isinstance(x, int) for x in scaling) + sx, sy = scaling + assert sx >= 1 and sy >= 1 + return sx, sy + +def _parse_padding(padding): + if isinstance(padding, int): + padding = [padding, padding] + assert isinstance(padding, (list, tuple)) + assert all(isinstance(x, int) for x in padding) + if len(padding) == 2: + padx, pady = padding + padding = [padx, padx, pady, pady] + padx0, padx1, pady0, pady1 = padding + return padx0, padx1, pady0, pady1 + +def _get_filter_size(f): + if f is None: + return 1, 1 + assert isinstance(f, torch.Tensor) and f.ndim in [1, 2] + fw = f.shape[-1] + fh = f.shape[0] + with misc.suppress_tracer_warnings(): + fw = int(fw) + fh = int(fh) + misc.assert_shape(f, [fh, fw][:f.ndim]) + assert fw >= 1 and fh >= 1 + return fw, fh + +#---------------------------------------------------------------------------- + +def setup_filter(f, device=torch.device('cpu'), normalize=True, flip_filter=False, gain=1, separable=None): + r"""Convenience function to setup 2D FIR filter for `upfirdn2d()`. + + Args: + f: Torch tensor, numpy array, or python list of the shape + `[filter_height, filter_width]` (non-separable), + `[filter_taps]` (separable), + `[]` (impulse), or + `None` (identity). + device: Result device (default: cpu). + normalize: Normalize the filter so that it retains the magnitude + for constant input signal (DC)? (default: True). + flip_filter: Flip the filter? (default: False). + gain: Overall scaling factor for signal magnitude (default: 1). + separable: Return a separable filter? (default: select automatically). + + Returns: + Float32 tensor of the shape + `[filter_height, filter_width]` (non-separable) or + `[filter_taps]` (separable). + """ + # Validate. + if f is None: + f = 1 + f = torch.as_tensor(f, dtype=torch.float32) + assert f.ndim in [0, 1, 2] + assert f.numel() > 0 + if f.ndim == 0: + f = f[np.newaxis] + + # Separable? + if separable is None: + separable = (f.ndim == 1 and f.numel() >= 8) + if f.ndim == 1 and not separable: + f = f.ger(f) + assert f.ndim == (1 if separable else 2) + + # Apply normalize, flip, gain, and device. + if normalize: + f /= f.sum() + if flip_filter: + f = f.flip(list(range(f.ndim))) + f = f * (gain ** (f.ndim / 2)) + f = f.to(device=device) + return f + +#---------------------------------------------------------------------------- + +def upfirdn2d(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1, impl='cuda'): + r"""Pad, upsample, filter, and downsample a batch of 2D images. + + Performs the following sequence of operations for each channel: + + 1. Upsample the image by inserting N-1 zeros after each pixel (`up`). + + 2. Pad the image with the specified number of zeros on each side (`padding`). + Negative padding corresponds to cropping the image. + + 3. Convolve the image with the specified 2D FIR filter (`f`), shrinking it + so that the footprint of all output pixels lies within the input image. + + 4. Downsample the image by keeping every Nth pixel (`down`). + + This sequence of operations bears close resemblance to scipy.signal.upfirdn(). + The fused op is considerably more efficient than performing the same calculation + using standard PyTorch ops. It supports gradients of arbitrary order. + + Args: + x: Float32/float64/float16 input tensor of the shape + `[batch_size, num_channels, in_height, in_width]`. + f: Float32 FIR filter of the shape + `[filter_height, filter_width]` (non-separable), + `[filter_taps]` (separable), or + `None` (identity). + up: Integer upsampling factor. Can be a single int or a list/tuple + `[x, y]` (default: 1). + down: Integer downsampling factor. Can be a single int or a list/tuple + `[x, y]` (default: 1). + padding: Padding with respect to the upsampled image. Can be a single number + or a list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]` + (default: 0). + flip_filter: False = convolution, True = correlation (default: False). + gain: Overall scaling factor for signal magnitude (default: 1). + impl: Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`). + + Returns: + Tensor of the shape `[batch_size, num_channels, out_height, out_width]`. + """ + assert isinstance(x, torch.Tensor) + assert impl in ['ref', 'cuda'] + if impl == 'cuda' and x.device.type == 'cuda' and _init(): + return _upfirdn2d_cuda(up=up, down=down, padding=padding, flip_filter=flip_filter, gain=gain).apply(x, f) + return _upfirdn2d_ref(x, f, up=up, down=down, padding=padding, flip_filter=flip_filter, gain=gain) + +#---------------------------------------------------------------------------- + +@misc.profiled_function +def _upfirdn2d_ref(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1): + """Slow reference implementation of `upfirdn2d()` using standard PyTorch ops. + """ + # Validate arguments. + assert isinstance(x, torch.Tensor) and x.ndim == 4 + if f is None: + f = torch.ones([1, 1], dtype=torch.float32, device=x.device) + assert isinstance(f, torch.Tensor) and f.ndim in [1, 2] + assert f.dtype == torch.float32 and not f.requires_grad + batch_size, num_channels, in_height, in_width = x.shape + upx, upy = _parse_scaling(up) + downx, downy = _parse_scaling(down) + padx0, padx1, pady0, pady1 = _parse_padding(padding) + + # Upsample by inserting zeros. + x = x.reshape([batch_size, num_channels, in_height, 1, in_width, 1]) + x = torch.nn.functional.pad(x, [0, upx - 1, 0, 0, 0, upy - 1]) + x = x.reshape([batch_size, num_channels, in_height * upy, in_width * upx]) + + # Pad or crop. + x = torch.nn.functional.pad(x, [max(padx0, 0), max(padx1, 0), max(pady0, 0), max(pady1, 0)]) + x = x[:, :, max(-pady0, 0) : x.shape[2] - max(-pady1, 0), max(-padx0, 0) : x.shape[3] - max(-padx1, 0)] + + # Setup filter. + f = f * (gain ** (f.ndim / 2)) + f = f.to(x.dtype) + if not flip_filter: + f = f.flip(list(range(f.ndim))) + + # Convolve with the filter. + f = f[np.newaxis, np.newaxis].repeat([num_channels, 1] + [1] * f.ndim) + if f.ndim == 4: + x = conv2d_gradfix.conv2d(input=x, weight=f, groups=num_channels) + else: + x = conv2d_gradfix.conv2d(input=x, weight=f.unsqueeze(2), groups=num_channels) + x = conv2d_gradfix.conv2d(input=x, weight=f.unsqueeze(3), groups=num_channels) + + # Downsample by throwing away pixels. + x = x[:, :, ::downy, ::downx] + return x + +#---------------------------------------------------------------------------- + +_upfirdn2d_cuda_cache = dict() + +def _upfirdn2d_cuda(up=1, down=1, padding=0, flip_filter=False, gain=1): + """Fast CUDA implementation of `upfirdn2d()` using custom ops. + """ + # Parse arguments. + upx, upy = _parse_scaling(up) + downx, downy = _parse_scaling(down) + padx0, padx1, pady0, pady1 = _parse_padding(padding) + + # Lookup from cache. + key = (upx, upy, downx, downy, padx0, padx1, pady0, pady1, flip_filter, gain) + if key in _upfirdn2d_cuda_cache: + return _upfirdn2d_cuda_cache[key] + + # Forward op. + class Upfirdn2dCuda(torch.autograd.Function): + @staticmethod + def forward(ctx, x, f): # pylint: disable=arguments-differ + assert isinstance(x, torch.Tensor) and x.ndim == 4 + if f is None: + f = torch.ones([1, 1], dtype=torch.float32, device=x.device) + assert isinstance(f, torch.Tensor) and f.ndim in [1, 2] + y = x + if f.ndim == 2: + y = _plugin.upfirdn2d(y, f, upx, upy, downx, downy, padx0, padx1, pady0, pady1, flip_filter, gain) + else: + y = _plugin.upfirdn2d(y, f.unsqueeze(0), upx, 1, downx, 1, padx0, padx1, 0, 0, flip_filter, np.sqrt(gain)) + y = _plugin.upfirdn2d(y, f.unsqueeze(1), 1, upy, 1, downy, 0, 0, pady0, pady1, flip_filter, np.sqrt(gain)) + ctx.save_for_backward(f) + ctx.x_shape = x.shape + return y + + @staticmethod + def backward(ctx, dy): # pylint: disable=arguments-differ + f, = ctx.saved_tensors + _, _, ih, iw = ctx.x_shape + _, _, oh, ow = dy.shape + fw, fh = _get_filter_size(f) + p = [ + fw - padx0 - 1, + iw * upx - ow * downx + padx0 - upx + 1, + fh - pady0 - 1, + ih * upy - oh * downy + pady0 - upy + 1, + ] + dx = None + df = None + + if ctx.needs_input_grad[0]: + dx = _upfirdn2d_cuda(up=down, down=up, padding=p, flip_filter=(not flip_filter), gain=gain).apply(dy, f) + + assert not ctx.needs_input_grad[1] + return dx, df + + # Add to cache. + _upfirdn2d_cuda_cache[key] = Upfirdn2dCuda + return Upfirdn2dCuda + +#---------------------------------------------------------------------------- + +def filter2d(x, f, padding=0, flip_filter=False, gain=1, impl='cuda'): + r"""Filter a batch of 2D images using the given 2D FIR filter. + + By default, the result is padded so that its shape matches the input. + User-specified padding is applied on top of that, with negative values + indicating cropping. Pixels outside the image are assumed to be zero. + + Args: + x: Float32/float64/float16 input tensor of the shape + `[batch_size, num_channels, in_height, in_width]`. + f: Float32 FIR filter of the shape + `[filter_height, filter_width]` (non-separable), + `[filter_taps]` (separable), or + `None` (identity). + padding: Padding with respect to the output. Can be a single number or a + list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]` + (default: 0). + flip_filter: False = convolution, True = correlation (default: False). + gain: Overall scaling factor for signal magnitude (default: 1). + impl: Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`). + + Returns: + Tensor of the shape `[batch_size, num_channels, out_height, out_width]`. + """ + padx0, padx1, pady0, pady1 = _parse_padding(padding) + fw, fh = _get_filter_size(f) + p = [ + padx0 + fw // 2, + padx1 + (fw - 1) // 2, + pady0 + fh // 2, + pady1 + (fh - 1) // 2, + ] + return upfirdn2d(x, f, padding=p, flip_filter=flip_filter, gain=gain, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample2d(x, f, up=2, padding=0, flip_filter=False, gain=1, impl='cuda'): + r"""Upsample a batch of 2D images using the given 2D FIR filter. + + By default, the result is padded so that its shape is a multiple of the input. + User-specified padding is applied on top of that, with negative values + indicating cropping. Pixels outside the image are assumed to be zero. + + Args: + x: Float32/float64/float16 input tensor of the shape + `[batch_size, num_channels, in_height, in_width]`. + f: Float32 FIR filter of the shape + `[filter_height, filter_width]` (non-separable), + `[filter_taps]` (separable), or + `None` (identity). + up: Integer upsampling factor. Can be a single int or a list/tuple + `[x, y]` (default: 1). + padding: Padding with respect to the output. Can be a single number or a + list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]` + (default: 0). + flip_filter: False = convolution, True = correlation (default: False). + gain: Overall scaling factor for signal magnitude (default: 1). + impl: Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`). + + Returns: + Tensor of the shape `[batch_size, num_channels, out_height, out_width]`. + """ + upx, upy = _parse_scaling(up) + padx0, padx1, pady0, pady1 = _parse_padding(padding) + fw, fh = _get_filter_size(f) + p = [ + padx0 + (fw + upx - 1) // 2, + padx1 + (fw - upx) // 2, + pady0 + (fh + upy - 1) // 2, + pady1 + (fh - upy) // 2, + ] + return upfirdn2d(x, f, up=up, padding=p, flip_filter=flip_filter, gain=gain*upx*upy, impl=impl) + +#---------------------------------------------------------------------------- + +def downsample2d(x, f, down=2, padding=0, flip_filter=False, gain=1, impl='cuda'): + r"""Downsample a batch of 2D images using the given 2D FIR filter. + + By default, the result is padded so that its shape is a fraction of the input. + User-specified padding is applied on top of that, with negative values + indicating cropping. Pixels outside the image are assumed to be zero. + + Args: + x: Float32/float64/float16 input tensor of the shape + `[batch_size, num_channels, in_height, in_width]`. + f: Float32 FIR filter of the shape + `[filter_height, filter_width]` (non-separable), + `[filter_taps]` (separable), or + `None` (identity). + down: Integer downsampling factor. Can be a single int or a list/tuple + `[x, y]` (default: 1). + padding: Padding with respect to the input. Can be a single number or a + list/tuple `[x, y]` or `[x_before, x_after, y_before, y_after]` + (default: 0). + flip_filter: False = convolution, True = correlation (default: False). + gain: Overall scaling factor for signal magnitude (default: 1). + impl: Implementation to use. Can be `'ref'` or `'cuda'` (default: `'cuda'`). + + Returns: + Tensor of the shape `[batch_size, num_channels, out_height, out_width]`. + """ + downx, downy = _parse_scaling(down) + padx0, padx1, pady0, pady1 = _parse_padding(padding) + fw, fh = _get_filter_size(f) + p = [ + padx0 + (fw - downx + 1) // 2, + padx1 + (fw - downx) // 2, + pady0 + (fh - downy + 1) // 2, + pady1 + (fh - downy) // 2, + ] + return upfirdn2d(x, f, down=down, padding=p, flip_filter=flip_filter, gain=gain, impl=impl) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/persistence.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/persistence.py new file mode 100644 index 0000000000000000000000000000000000000000..76ba3db98086743cdd285500670fddfc6bb42777 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/persistence.py @@ -0,0 +1,251 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Facilities for pickling Python code alongside other data. + +The pickled code is automatically imported into a separate Python module +during unpickling. This way, any previously exported pickles will remain +usable even if the original code is no longer available, or if the current +version of the code is not consistent with what was originally pickled.""" + +import sys +import pickle +import io +import inspect +import copy +import uuid +import types +import dnnlib + +#---------------------------------------------------------------------------- + +_version = 6 # internal version number +_decorators = set() # {decorator_class, ...} +_import_hooks = [] # [hook_function, ...] +_module_to_src_dict = dict() # {module: src, ...} +_src_to_module_dict = dict() # {src: module, ...} + +#---------------------------------------------------------------------------- + +def persistent_class(orig_class): + r"""Class decorator that extends a given class to save its source code + when pickled. + + Example: + + from torch_utils import persistence + + @persistence.persistent_class + class MyNetwork(torch.nn.Module): + def __init__(self, num_inputs, num_outputs): + super().__init__() + self.fc = MyLayer(num_inputs, num_outputs) + ... + + @persistence.persistent_class + class MyLayer(torch.nn.Module): + ... + + When pickled, any instance of `MyNetwork` and `MyLayer` will save its + source code alongside other internal state (e.g., parameters, buffers, + and submodules). This way, any previously exported pickle will remain + usable even if the class definitions have been modified or are no + longer available. + + The decorator saves the source code of the entire Python module + containing the decorated class. It does *not* save the source code of + any imported modules. Thus, the imported modules must be available + during unpickling, also including `torch_utils.persistence` itself. + + It is ok to call functions defined in the same module from the + decorated class. However, if the decorated class depends on other + classes defined in the same module, they must be decorated as well. + This is illustrated in the above example in the case of `MyLayer`. + + It is also possible to employ the decorator just-in-time before + calling the constructor. For example: + + cls = MyLayer + if want_to_make_it_persistent: + cls = persistence.persistent_class(cls) + layer = cls(num_inputs, num_outputs) + + As an additional feature, the decorator also keeps track of the + arguments that were used to construct each instance of the decorated + class. The arguments can be queried via `obj.init_args` and + `obj.init_kwargs`, and they are automatically pickled alongside other + object state. A typical use case is to first unpickle a previous + instance of a persistent class, and then upgrade it to use the latest + version of the source code: + + with open('old_pickle.pkl', 'rb') as f: + old_net = pickle.load(f) + new_net = MyNetwork(*old_obj.init_args, **old_obj.init_kwargs) + misc.copy_params_and_buffers(old_net, new_net, require_all=True) + """ + assert isinstance(orig_class, type) + if is_persistent(orig_class): + return orig_class + + assert orig_class.__module__ in sys.modules + orig_module = sys.modules[orig_class.__module__] + orig_module_src = _module_to_src(orig_module) + + class Decorator(orig_class): + _orig_module_src = orig_module_src + _orig_class_name = orig_class.__name__ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._init_args = copy.deepcopy(args) + self._init_kwargs = copy.deepcopy(kwargs) + assert orig_class.__name__ in orig_module.__dict__ + _check_pickleable(self.__reduce__()) + + @property + def init_args(self): + return copy.deepcopy(self._init_args) + + @property + def init_kwargs(self): + return dnnlib.EasyDict(copy.deepcopy(self._init_kwargs)) + + def __reduce__(self): + fields = list(super().__reduce__()) + fields += [None] * max(3 - len(fields), 0) + if fields[0] is not _reconstruct_persistent_obj: + meta = dict(type='class', version=_version, module_src=self._orig_module_src, class_name=self._orig_class_name, state=fields[2]) + fields[0] = _reconstruct_persistent_obj # reconstruct func + fields[1] = (meta,) # reconstruct args + fields[2] = None # state dict + return tuple(fields) + + Decorator.__name__ = orig_class.__name__ + _decorators.add(Decorator) + return Decorator + +#---------------------------------------------------------------------------- + +def is_persistent(obj): + r"""Test whether the given object or class is persistent, i.e., + whether it will save its source code when pickled. + """ + try: + if obj in _decorators: + return True + except TypeError: + pass + return type(obj) in _decorators # pylint: disable=unidiomatic-typecheck + +#---------------------------------------------------------------------------- + +def import_hook(hook): + r"""Register an import hook that is called whenever a persistent object + is being unpickled. A typical use case is to patch the pickled source + code to avoid errors and inconsistencies when the API of some imported + module has changed. + + The hook should have the following signature: + + hook(meta) -> modified meta + + `meta` is an instance of `dnnlib.EasyDict` with the following fields: + + type: Type of the persistent object, e.g. `'class'`. + version: Internal version number of `torch_utils.persistence`. + module_src Original source code of the Python module. + class_name: Class name in the original Python module. + state: Internal state of the object. + + Example: + + @persistence.import_hook + def wreck_my_network(meta): + if meta.class_name == 'MyNetwork': + print('MyNetwork is being imported. I will wreck it!') + meta.module_src = meta.module_src.replace("True", "False") + return meta + """ + assert callable(hook) + _import_hooks.append(hook) + +#---------------------------------------------------------------------------- + +def _reconstruct_persistent_obj(meta): + r"""Hook that is called internally by the `pickle` module to unpickle + a persistent object. + """ + meta = dnnlib.EasyDict(meta) + meta.state = dnnlib.EasyDict(meta.state) + for hook in _import_hooks: + meta = hook(meta) + assert meta is not None + + assert meta.version == _version + module = _src_to_module(meta.module_src) + + assert meta.type == 'class' + orig_class = module.__dict__[meta.class_name] + decorator_class = persistent_class(orig_class) + obj = decorator_class.__new__(decorator_class) + + setstate = getattr(obj, '__setstate__', None) + if callable(setstate): + setstate(meta.state) # pylint: disable=not-callable + else: + obj.__dict__.update(meta.state) + return obj + +#---------------------------------------------------------------------------- + +def _module_to_src(module): + r"""Query the source code of a given Python module. + """ + src = _module_to_src_dict.get(module, None) + if src is None: + src = inspect.getsource(module) + _module_to_src_dict[module] = src + _src_to_module_dict[src] = module + return src + +def _src_to_module(src): + r"""Get or create a Python module for the given source code. + """ + module = _src_to_module_dict.get(src, None) + if module is None: + module_name = "_imported_module_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _module_to_src_dict[module] = src + _src_to_module_dict[src] = module + exec(src, module.__dict__) # pylint: disable=exec-used + return module + +#---------------------------------------------------------------------------- + +def _check_pickleable(obj): + r"""Check that the given object is pickleable, raising an exception if + it is not. This function is expected to be considerably more efficient + than actually pickling the object. + """ + def recurse(obj): + if isinstance(obj, (list, tuple, set)): + return [recurse(x) for x in obj] + if isinstance(obj, dict): + return [[recurse(x), recurse(y)] for x, y in obj.items()] + if isinstance(obj, (str, int, float, bool, bytes, bytearray)): + return None # Python primitive types are pickleable. + if f'{type(obj).__module__}.{type(obj).__name__}' in ['numpy.ndarray', 'torch.Tensor']: + return None # NumPy arrays and PyTorch tensors are pickleable. + if is_persistent(obj): + return None # Persistent objects are pickleable, by virtue of the constructor check. + return obj + with io.BytesIO() as f: + pickle.dump(recurse(obj), f) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/training_stats.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/training_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..26f467f9eaa074ee13de1cf2625cd7da44880847 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/torch_utils/training_stats.py @@ -0,0 +1,268 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Facilities for reporting and collecting training statistics across +multiple processes and devices. The interface is designed to minimize +synchronization overhead as well as the amount of boilerplate in user +code.""" + +import re +import numpy as np +import torch +import dnnlib + +from . import misc + +#---------------------------------------------------------------------------- + +_num_moments = 3 # [num_scalars, sum_of_scalars, sum_of_squares] +_reduce_dtype = torch.float32 # Data type to use for initial per-tensor reduction. +_counter_dtype = torch.float64 # Data type to use for the internal counters. +_rank = 0 # Rank of the current process. +_sync_device = None # Device to use for multiprocess communication. None = single-process. +_sync_called = False # Has _sync() been called yet? +_counters = dict() # Running counters on each device, updated by report(): name => device => torch.Tensor +_cumulative = dict() # Cumulative counters on the CPU, updated by _sync(): name => torch.Tensor + +#---------------------------------------------------------------------------- + +def init_multiprocessing(rank, sync_device): + r"""Initializes `torch_utils.training_stats` for collecting statistics + across multiple processes. + + This function must be called after + `torch.distributed.init_process_group()` and before `Collector.update()`. + The call is not necessary if multi-process collection is not needed. + + Args: + rank: Rank of the current process. + sync_device: PyTorch device to use for inter-process + communication, or None to disable multi-process + collection. Typically `torch.device('cuda', rank)`. + """ + global _rank, _sync_device + assert not _sync_called + _rank = rank + _sync_device = sync_device + +#---------------------------------------------------------------------------- + +@misc.profiled_function +def report(name, value): + r"""Broadcasts the given set of scalars to all interested instances of + `Collector`, across device and process boundaries. + + This function is expected to be extremely cheap and can be safely + called from anywhere in the training loop, loss function, or inside a + `torch.nn.Module`. + + Warning: The current implementation expects the set of unique names to + be consistent across processes. Please make sure that `report()` is + called at least once for each unique name by each process, and in the + same order. If a given process has no scalars to broadcast, it can do + `report(name, [])` (empty list). + + Args: + name: Arbitrary string specifying the name of the statistic. + Averages are accumulated separately for each unique name. + value: Arbitrary set of scalars. Can be a list, tuple, + NumPy array, PyTorch tensor, or Python scalar. + + Returns: + The same `value` that was passed in. + """ + if name not in _counters: + _counters[name] = dict() + + elems = torch.as_tensor(value) + if elems.numel() == 0: + return value + + elems = elems.detach().flatten().to(_reduce_dtype) + moments = torch.stack([ + torch.ones_like(elems).sum(), + elems.sum(), + elems.square().sum(), + ]) + assert moments.ndim == 1 and moments.shape[0] == _num_moments + moments = moments.to(_counter_dtype) + + device = moments.device + if device not in _counters[name]: + _counters[name][device] = torch.zeros_like(moments) + _counters[name][device].add_(moments) + return value + +#---------------------------------------------------------------------------- + +def report0(name, value): + r"""Broadcasts the given set of scalars by the first process (`rank = 0`), + but ignores any scalars provided by the other processes. + See `report()` for further details. + """ + report(name, value if _rank == 0 else []) + return value + +#---------------------------------------------------------------------------- + +class Collector: + r"""Collects the scalars broadcasted by `report()` and `report0()` and + computes their long-term averages (mean and standard deviation) over + user-defined periods of time. + + The averages are first collected into internal counters that are not + directly visible to the user. They are then copied to the user-visible + state as a result of calling `update()` and can then be queried using + `mean()`, `std()`, `as_dict()`, etc. Calling `update()` also resets the + internal counters for the next round, so that the user-visible state + effectively reflects averages collected between the last two calls to + `update()`. + + Args: + regex: Regular expression defining which statistics to + collect. The default is to collect everything. + keep_previous: Whether to retain the previous averages if no + scalars were collected on a given round + (default: True). + """ + def __init__(self, regex='.*', keep_previous=True): + self._regex = re.compile(regex) + self._keep_previous = keep_previous + self._cumulative = dict() + self._moments = dict() + self.update() + self._moments.clear() + + def names(self): + r"""Returns the names of all statistics broadcasted so far that + match the regular expression specified at construction time. + """ + return [name for name in _counters if self._regex.fullmatch(name)] + + def update(self): + r"""Copies current values of the internal counters to the + user-visible state and resets them for the next round. + + If `keep_previous=True` was specified at construction time, the + operation is skipped for statistics that have received no scalars + since the last update, retaining their previous averages. + + This method performs a number of GPU-to-CPU transfers and one + `torch.distributed.all_reduce()`. It is intended to be called + periodically in the main training loop, typically once every + N training steps. + """ + if not self._keep_previous: + self._moments.clear() + for name, cumulative in _sync(self.names()): + if name not in self._cumulative: + self._cumulative[name] = torch.zeros([_num_moments], dtype=_counter_dtype) + delta = cumulative - self._cumulative[name] + self._cumulative[name].copy_(cumulative) + if float(delta[0]) != 0: + self._moments[name] = delta + + def _get_delta(self, name): + r"""Returns the raw moments that were accumulated for the given + statistic between the last two calls to `update()`, or zero if + no scalars were collected. + """ + assert self._regex.fullmatch(name) + if name not in self._moments: + self._moments[name] = torch.zeros([_num_moments], dtype=_counter_dtype) + return self._moments[name] + + def num(self, name): + r"""Returns the number of scalars that were accumulated for the given + statistic between the last two calls to `update()`, or zero if + no scalars were collected. + """ + delta = self._get_delta(name) + return int(delta[0]) + + def mean(self, name): + r"""Returns the mean of the scalars that were accumulated for the + given statistic between the last two calls to `update()`, or NaN if + no scalars were collected. + """ + delta = self._get_delta(name) + if int(delta[0]) == 0: + return float('nan') + return float(delta[1] / delta[0]) + + def std(self, name): + r"""Returns the standard deviation of the scalars that were + accumulated for the given statistic between the last two calls to + `update()`, or NaN if no scalars were collected. + """ + delta = self._get_delta(name) + if int(delta[0]) == 0 or not np.isfinite(float(delta[1])): + return float('nan') + if int(delta[0]) == 1: + return float(0) + mean = float(delta[1] / delta[0]) + raw_var = float(delta[2] / delta[0]) + return np.sqrt(max(raw_var - np.square(mean), 0)) + + def as_dict(self): + r"""Returns the averages accumulated between the last two calls to + `update()` as an `dnnlib.EasyDict`. The contents are as follows: + + dnnlib.EasyDict( + NAME = dnnlib.EasyDict(num=FLOAT, mean=FLOAT, std=FLOAT), + ... + ) + """ + stats = dnnlib.EasyDict() + for name in self.names(): + stats[name] = dnnlib.EasyDict(num=self.num(name), mean=self.mean(name), std=self.std(name)) + return stats + + def __getitem__(self, name): + r"""Convenience getter. + `collector[name]` is a synonym for `collector.mean(name)`. + """ + return self.mean(name) + +#---------------------------------------------------------------------------- + +def _sync(names): + r"""Synchronize the global cumulative counters across devices and + processes. Called internally by `Collector.update()`. + """ + if len(names) == 0: + return [] + global _sync_called + _sync_called = True + + # Collect deltas within current rank. + deltas = [] + device = _sync_device if _sync_device is not None else torch.device('cpu') + for name in names: + delta = torch.zeros([_num_moments], dtype=_counter_dtype, device=device) + for counter in _counters[name].values(): + delta.add_(counter.to(device)) + counter.copy_(torch.zeros_like(counter)) + deltas.append(delta) + deltas = torch.stack(deltas) + + # Sum deltas across ranks. + if _sync_device is not None: + torch.distributed.all_reduce(deltas) + + # Update cumulative values. + deltas = deltas.cpu() + for idx, name in enumerate(names): + if name not in _cumulative: + _cumulative[name] = torch.zeros([_num_moments], dtype=_counter_dtype) + _cumulative[name].add_(deltas[idx]) + + # Return name-value pairs. + return [(name, _cumulative[name]) for name in names] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/train.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/train.py new file mode 100644 index 0000000000000000000000000000000000000000..8d81b3f18e7ebae0b63dc68674c258c5c38bee71 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/train.py @@ -0,0 +1,540 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Train a GAN using the techniques described in the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import os +import click +import re +import json +import tempfile +import torch +import dnnlib + +from training import training_loop +from metrics import metric_main +from torch_utils import training_stats +from torch_utils import custom_ops + +#---------------------------------------------------------------------------- + +class UserError(Exception): + pass + +#---------------------------------------------------------------------------- + +def setup_training_loop_kwargs( + # General options (not included in desc). + gpus = None, # Number of GPUs: , default = 1 gpu + snap = None, # Snapshot interval: , default = 50 ticks + metrics = None, # List of metric names: [], ['fid50k_full'] (default), ... + seed = None, # Random seed: , default = 0 + + # Dataset. + data = None, # Training dataset (required): + cond = None, # Train conditional model based on dataset labels: , default = False + subset = None, # Train with only N images: , default = all + mirror = None, # Augment dataset with x-flips: , default = False + + # Base config. + cfg = None, # Base config: 'auto' (default), 'stylegan2', 'paper256', 'paper512', 'paper1024', 'cifar' + gamma = None, # Override R1 gamma: + kimg = None, # Override training duration: + batch = None, # Override batch size: + + # Discriminator augmentation. + aug = None, # Augmentation mode: 'ada' (default), 'noaug', 'fixed' + p = None, # Specify p for 'fixed' (required): + target = None, # Override ADA target for 'ada': , default = depends on aug + augpipe = None, # Augmentation pipeline: 'blit', 'geom', 'color', 'filter', 'noise', 'cutout', 'bg', 'bgc' (default), ..., 'bgcfnc' + + # Transfer learning. + resume = None, # Load previous network: 'noresume' (default), 'ffhq256', 'ffhq512', 'ffhq1024', 'celebahq256', 'lsundog256', , + freezed = None, # Freeze-D: , default = 0 discriminator layers + + # Performance options (not included in desc). + fp32 = None, # Disable mixed-precision training: , default = False + nhwc = None, # Use NHWC memory format with FP16: , default = False + allow_tf32 = None, # Allow PyTorch to use TF32 for matmul and convolutions: , default = False + nobench = None, # Disable cuDNN benchmarking: , default = False + workers = None, # Override number of DataLoader workers: , default = 3 +): + args = dnnlib.EasyDict() + + # ------------------------------------------ + # General options: gpus, snap, metrics, seed + # ------------------------------------------ + + if gpus is None: + gpus = 1 + assert isinstance(gpus, int) + if not (gpus >= 1 and gpus & (gpus - 1) == 0): + raise UserError('--gpus must be a power of two') + args.num_gpus = gpus + + if snap is None: + snap = 50 + assert isinstance(snap, int) + if snap < 1: + raise UserError('--snap must be at least 1') + args.image_snapshot_ticks = snap + args.network_snapshot_ticks = snap + + if metrics is None: + metrics = ['fid50k_full'] + assert isinstance(metrics, list) + if not all(metric_main.is_valid_metric(metric) for metric in metrics): + raise UserError('\n'.join(['--metrics can only contain the following values:'] + metric_main.list_valid_metrics())) + args.metrics = metrics + + if seed is None: + seed = 0 + assert isinstance(seed, int) + args.random_seed = seed + + # ----------------------------------- + # Dataset: data, cond, subset, mirror + # ----------------------------------- + + assert data is not None + assert isinstance(data, str) + args.training_set_kwargs = dnnlib.EasyDict(class_name='training.dataset.ImageFolderDataset', path=data, use_labels=True, max_size=None, xflip=False) + args.data_loader_kwargs = dnnlib.EasyDict(pin_memory=True, num_workers=3, prefetch_factor=2) + try: + training_set = dnnlib.util.construct_class_by_name(**args.training_set_kwargs) # subclass of training.dataset.Dataset + args.training_set_kwargs.resolution = training_set.resolution # be explicit about resolution + args.training_set_kwargs.use_labels = training_set.has_labels # be explicit about labels + args.training_set_kwargs.max_size = len(training_set) # be explicit about dataset size + desc = training_set.name + del training_set # conserve memory + except IOError as err: + raise UserError(f'--data: {err}') + + if cond is None: + cond = False + assert isinstance(cond, bool) + if cond: + if not args.training_set_kwargs.use_labels: + raise UserError('--cond=True requires labels specified in dataset.json') + desc += '-cond' + else: + args.training_set_kwargs.use_labels = False + + if subset is not None: + assert isinstance(subset, int) + if not 1 <= subset <= args.training_set_kwargs.max_size: + raise UserError(f'--subset must be between 1 and {args.training_set_kwargs.max_size}') + desc += f'-subset{subset}' + if subset < args.training_set_kwargs.max_size: + args.training_set_kwargs.max_size = subset + args.training_set_kwargs.random_seed = args.random_seed + + if mirror is None: + mirror = False + assert isinstance(mirror, bool) + if mirror: + desc += '-mirror' + args.training_set_kwargs.xflip = True + + # ------------------------------------ + # Base config: cfg, gamma, kimg, batch + # ------------------------------------ + + if cfg is None: + cfg = 'auto' + assert isinstance(cfg, str) + desc += f'-{cfg}' + + cfg_specs = { + 'auto': dict(ref_gpus=-1, kimg=25000, mb=-1, mbstd=-1, fmaps=-1, lrate=-1, gamma=-1, ema=-1, ramp=0.05, map=2), # Populated dynamically based on resolution and GPU count. + 'stylegan2': dict(ref_gpus=8, kimg=25000, mb=32, mbstd=4, fmaps=1, lrate=0.002, gamma=10, ema=10, ramp=None, map=8), # Uses mixed-precision, unlike the original StyleGAN2. + 'paper256': dict(ref_gpus=8, kimg=25000, mb=64, mbstd=8, fmaps=0.5, lrate=0.0025, gamma=1, ema=20, ramp=None, map=8), + 'paper512': dict(ref_gpus=8, kimg=25000, mb=64, mbstd=8, fmaps=1, lrate=0.0025, gamma=0.5, ema=20, ramp=None, map=8), + 'paper1024': dict(ref_gpus=8, kimg=25000, mb=32, mbstd=4, fmaps=1, lrate=0.002, gamma=2, ema=10, ramp=None, map=8), + 'cifar': dict(ref_gpus=2, kimg=100000, mb=64, mbstd=32, fmaps=1, lrate=0.0025, gamma=0.01, ema=500, ramp=0.05, map=2), + } + + assert cfg in cfg_specs + spec = dnnlib.EasyDict(cfg_specs[cfg]) + if cfg == 'auto': + desc += f'{gpus:d}' + spec.ref_gpus = gpus + res = args.training_set_kwargs.resolution + spec.mb = max(min(gpus * min(4096 // res, 32), 64), gpus) # keep gpu memory consumption at bay + spec.mbstd = min(spec.mb // gpus, 4) # other hyperparams behave more predictably if mbstd group size remains fixed + spec.fmaps = 1 if res >= 512 else 0.5 + spec.lrate = 0.002 if res >= 1024 else 0.0025 + spec.gamma = 0.0002 * (res ** 2) / spec.mb # heuristic formula + spec.ema = spec.mb * 10 / 32 + + args.G_kwargs = dnnlib.EasyDict(class_name='training.networks.Generator', z_dim=512, w_dim=512, mapping_kwargs=dnnlib.EasyDict(), synthesis_kwargs=dnnlib.EasyDict()) + args.D_kwargs = dnnlib.EasyDict(class_name='training.networks.Discriminator', block_kwargs=dnnlib.EasyDict(), mapping_kwargs=dnnlib.EasyDict(), epilogue_kwargs=dnnlib.EasyDict()) + args.G_kwargs.synthesis_kwargs.channel_base = args.D_kwargs.channel_base = int(spec.fmaps * 32768) + args.G_kwargs.synthesis_kwargs.channel_max = args.D_kwargs.channel_max = 512 + args.G_kwargs.mapping_kwargs.num_layers = spec.map + args.G_kwargs.synthesis_kwargs.num_fp16_res = args.D_kwargs.num_fp16_res = 4 # enable mixed-precision training + args.G_kwargs.synthesis_kwargs.conv_clamp = args.D_kwargs.conv_clamp = 256 # clamp activations to avoid float16 overflow + args.D_kwargs.epilogue_kwargs.mbstd_group_size = spec.mbstd + + args.G_opt_kwargs = dnnlib.EasyDict(class_name='torch.optim.Adam', lr=spec.lrate, betas=[0,0.99], eps=1e-8) + args.D_opt_kwargs = dnnlib.EasyDict(class_name='torch.optim.Adam', lr=spec.lrate, betas=[0,0.99], eps=1e-8) + args.loss_kwargs = dnnlib.EasyDict(class_name='training.loss.StyleGAN2Loss', r1_gamma=spec.gamma) + + args.total_kimg = spec.kimg + args.batch_size = spec.mb + args.batch_gpu = spec.mb // spec.ref_gpus + args.ema_kimg = spec.ema + args.ema_rampup = spec.ramp + + if cfg == 'cifar': + args.loss_kwargs.pl_weight = 0 # disable path length regularization + args.loss_kwargs.style_mixing_prob = 0 # disable style mixing + args.D_kwargs.architecture = 'orig' # disable residual skip connections + + if gamma is not None: + assert isinstance(gamma, float) + if not gamma >= 0: + raise UserError('--gamma must be non-negative') + desc += f'-gamma{gamma:g}' + args.loss_kwargs.r1_gamma = gamma + + if kimg is not None: + assert isinstance(kimg, int) + if not kimg >= 1: + raise UserError('--kimg must be at least 1') + desc += f'-kimg{kimg:d}' + args.total_kimg = kimg + + if batch is not None: + assert isinstance(batch, int) + if not (batch >= 1 and batch % gpus == 0): + raise UserError('--batch must be at least 1 and divisible by --gpus') + desc += f'-batch{batch}' + args.batch_size = batch + args.batch_gpu = batch // gpus + + # --------------------------------------------------- + # Discriminator augmentation: aug, p, target, augpipe + # --------------------------------------------------- + + if aug is None: + aug = 'ada' + else: + assert isinstance(aug, str) + desc += f'-{aug}' + + if aug == 'ada': + args.ada_target = 0.6 + + elif aug == 'noaug': + pass + + elif aug == 'fixed': + if p is None: + raise UserError(f'--aug={aug} requires specifying --p') + + else: + raise UserError(f'--aug={aug} not supported') + + if p is not None: + assert isinstance(p, float) + if aug != 'fixed': + raise UserError('--p can only be specified with --aug=fixed') + if not 0 <= p <= 1: + raise UserError('--p must be between 0 and 1') + desc += f'-p{p:g}' + args.augment_p = p + + if target is not None: + assert isinstance(target, float) + if aug != 'ada': + raise UserError('--target can only be specified with --aug=ada') + if not 0 <= target <= 1: + raise UserError('--target must be between 0 and 1') + desc += f'-target{target:g}' + args.ada_target = target + + assert augpipe is None or isinstance(augpipe, str) + if augpipe is None: + augpipe = 'bgc' + else: + if aug == 'noaug': + raise UserError('--augpipe cannot be specified with --aug=noaug') + desc += f'-{augpipe}' + + augpipe_specs = { + 'blit': dict(xflip=1, rotate90=1, xint=1), + 'geom': dict(scale=1, rotate=1, aniso=1, xfrac=1), + 'color': dict(brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1), + 'filter': dict(imgfilter=1), + 'noise': dict(noise=1), + 'cutout': dict(cutout=1), + 'bg': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1), + 'bgc': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1), + 'bgcf': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1), + 'bgcfn': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1, noise=1), + 'bgcfnc': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1, noise=1, cutout=1), + } + + assert augpipe in augpipe_specs + if aug != 'noaug': + args.augment_kwargs = dnnlib.EasyDict(class_name='training.augment.AugmentPipe', **augpipe_specs[augpipe]) + + # ---------------------------------- + # Transfer learning: resume, freezed + # ---------------------------------- + + resume_specs = { + 'ffhq256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res256-mirror-paper256-noaug.pkl', + 'ffhq512': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl', + 'ffhq1024': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/ffhq-res1024-mirror-stylegan2-noaug.pkl', + 'celebahq256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/celebahq-res256-mirror-paper256-kimg100000-ada-target0.5.pkl', + 'lsundog256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/transfer-learning-source-nets/lsundog-res256-paper256-kimg100000-noaug.pkl', + } + + assert resume is None or isinstance(resume, str) + if resume is None: + resume = 'noresume' + elif resume == 'noresume': + desc += '-noresume' + elif resume in resume_specs: + desc += f'-resume{resume}' + args.resume_pkl = resume_specs[resume] # predefined url + else: + desc += '-resumecustom' + args.resume_pkl = resume # custom path or url + + if resume != 'noresume': + args.ada_kimg = 100 # make ADA react faster at the beginning + args.ema_rampup = None # disable EMA rampup + + if freezed is not None: + assert isinstance(freezed, int) + if not freezed >= 0: + raise UserError('--freezed must be non-negative') + desc += f'-freezed{freezed:d}' + args.D_kwargs.block_kwargs.freeze_layers = freezed + + # ------------------------------------------------- + # Performance options: fp32, nhwc, nobench, workers + # ------------------------------------------------- + + if fp32 is None: + fp32 = False + assert isinstance(fp32, bool) + if fp32: + args.G_kwargs.synthesis_kwargs.num_fp16_res = args.D_kwargs.num_fp16_res = 0 + args.G_kwargs.synthesis_kwargs.conv_clamp = args.D_kwargs.conv_clamp = None + + if nhwc is None: + nhwc = False + assert isinstance(nhwc, bool) + if nhwc: + args.G_kwargs.synthesis_kwargs.fp16_channels_last = args.D_kwargs.block_kwargs.fp16_channels_last = True + + if nobench is None: + nobench = False + assert isinstance(nobench, bool) + if nobench: + args.cudnn_benchmark = False + + if allow_tf32 is None: + allow_tf32 = False + assert isinstance(allow_tf32, bool) + if allow_tf32: + args.allow_tf32 = True + + if workers is not None: + assert isinstance(workers, int) + if not workers >= 1: + raise UserError('--workers must be at least 1') + args.data_loader_kwargs.num_workers = workers + + return desc, args + +#---------------------------------------------------------------------------- + +def subprocess_fn(rank, args, temp_dir): + dnnlib.util.Logger(file_name=os.path.join(args.run_dir, 'log.txt'), file_mode='a', should_flush=True) + + # Init torch.distributed. + if args.num_gpus > 1: + init_file = os.path.abspath(os.path.join(temp_dir, '.torch_distributed_init')) + if os.name == 'nt': + init_method = 'file:///' + init_file.replace('\\', '/') + torch.distributed.init_process_group(backend='gloo', init_method=init_method, rank=rank, world_size=args.num_gpus) + else: + init_method = f'file://{init_file}' + torch.distributed.init_process_group(backend='nccl', init_method=init_method, rank=rank, world_size=args.num_gpus) + + # Init torch_utils. + sync_device = torch.device('cuda', rank) if args.num_gpus > 1 else None + training_stats.init_multiprocessing(rank=rank, sync_device=sync_device) + if rank != 0: + custom_ops.verbosity = 'none' + + # Execute training loop. + training_loop.training_loop(rank=rank, **args) + +#---------------------------------------------------------------------------- + +class CommaSeparatedList(click.ParamType): + name = 'list' + + def convert(self, value, param, ctx): + _ = param, ctx + if value is None or value.lower() == 'none' or value == '': + return [] + return value.split(',') + +#---------------------------------------------------------------------------- + +@click.command() +@click.pass_context + +# General options. +@click.option('--outdir', help='Where to save the results', required=True, metavar='DIR') +@click.option('--gpus', help='Number of GPUs to use [default: 1]', type=int, metavar='INT') +@click.option('--snap', help='Snapshot interval [default: 50 ticks]', type=int, metavar='INT') +@click.option('--metrics', help='Comma-separated list or "none" [default: fid50k_full]', type=CommaSeparatedList()) +@click.option('--seed', help='Random seed [default: 0]', type=int, metavar='INT') +@click.option('-n', '--dry-run', help='Print training options and exit', is_flag=True) + +# Dataset. +@click.option('--data', help='Training data (directory or zip)', metavar='PATH', required=True) +@click.option('--cond', help='Train conditional model based on dataset labels [default: false]', type=bool, metavar='BOOL') +@click.option('--subset', help='Train with only N images [default: all]', type=int, metavar='INT') +@click.option('--mirror', help='Enable dataset x-flips [default: false]', type=bool, metavar='BOOL') + +# Base config. +@click.option('--cfg', help='Base config [default: auto]', type=click.Choice(['auto', 'stylegan2', 'paper256', 'paper512', 'paper1024', 'cifar'])) +@click.option('--gamma', help='Override R1 gamma', type=float) +@click.option('--kimg', help='Override training duration', type=int, metavar='INT') +@click.option('--batch', help='Override batch size', type=int, metavar='INT') + +# Discriminator augmentation. +@click.option('--aug', help='Augmentation mode [default: ada]', type=click.Choice(['noaug', 'ada', 'fixed'])) +@click.option('--p', help='Augmentation probability for --aug=fixed', type=float) +@click.option('--target', help='ADA target value for --aug=ada', type=float) +@click.option('--augpipe', help='Augmentation pipeline [default: bgc]', type=click.Choice(['blit', 'geom', 'color', 'filter', 'noise', 'cutout', 'bg', 'bgc', 'bgcf', 'bgcfn', 'bgcfnc'])) + +# Transfer learning. +@click.option('--resume', help='Resume training [default: noresume]', metavar='PKL') +@click.option('--freezed', help='Freeze-D [default: 0 layers]', type=int, metavar='INT') + +# Performance options. +@click.option('--fp32', help='Disable mixed-precision training', type=bool, metavar='BOOL') +@click.option('--nhwc', help='Use NHWC memory format with FP16', type=bool, metavar='BOOL') +@click.option('--nobench', help='Disable cuDNN benchmarking', type=bool, metavar='BOOL') +@click.option('--allow-tf32', help='Allow PyTorch to use TF32 internally', type=bool, metavar='BOOL') +@click.option('--workers', help='Override number of DataLoader workers', type=int, metavar='INT') + +def main(ctx, outdir, dry_run, **config_kwargs): + """Train a GAN using the techniques described in the paper + "Training Generative Adversarial Networks with Limited Data". + + Examples: + + \b + # Train with custom dataset using 1 GPU. + python train.py --outdir=~/training-runs --data=~/mydataset.zip --gpus=1 + + \b + # Train class-conditional CIFAR-10 using 2 GPUs. + python train.py --outdir=~/training-runs --data=~/datasets/cifar10.zip \\ + --gpus=2 --cfg=cifar --cond=1 + + \b + # Transfer learn MetFaces from FFHQ using 4 GPUs. + python train.py --outdir=~/training-runs --data=~/datasets/metfaces.zip \\ + --gpus=4 --cfg=paper1024 --mirror=1 --resume=ffhq1024 --snap=10 + + \b + # Reproduce original StyleGAN2 config F. + python train.py --outdir=~/training-runs --data=~/datasets/ffhq.zip \\ + --gpus=8 --cfg=stylegan2 --mirror=1 --aug=noaug + + \b + Base configs (--cfg): + auto Automatically select reasonable defaults based on resolution + and GPU count. Good starting point for new datasets. + stylegan2 Reproduce results for StyleGAN2 config F at 1024x1024. + paper256 Reproduce results for FFHQ and LSUN Cat at 256x256. + paper512 Reproduce results for BreCaHAD and AFHQ at 512x512. + paper1024 Reproduce results for MetFaces at 1024x1024. + cifar Reproduce results for CIFAR-10 at 32x32. + + \b + Transfer learning source networks (--resume): + ffhq256 FFHQ trained at 256x256 resolution. + ffhq512 FFHQ trained at 512x512 resolution. + ffhq1024 FFHQ trained at 1024x1024 resolution. + celebahq256 CelebA-HQ trained at 256x256 resolution. + lsundog256 LSUN Dog trained at 256x256 resolution. + Custom network pickle. + """ + dnnlib.util.Logger(should_flush=True) + + # Setup training options. + try: + run_desc, args = setup_training_loop_kwargs(**config_kwargs) + except UserError as err: + ctx.fail(err) + + # Pick output directory. + prev_run_dirs = [] + if os.path.isdir(outdir): + prev_run_dirs = [x for x in os.listdir(outdir) if os.path.isdir(os.path.join(outdir, x))] + prev_run_ids = [re.match(r'^\d+', x) for x in prev_run_dirs] + prev_run_ids = [int(x.group()) for x in prev_run_ids if x is not None] + cur_run_id = max(prev_run_ids, default=-1) + 1 + args.run_dir = os.path.join(outdir, f'{cur_run_id:05d}-{run_desc}') + assert not os.path.exists(args.run_dir) + + # Print options. + print() + print('Training options:') + print(json.dumps(args, indent=2)) + print() + print(f'Output directory: {args.run_dir}') + print(f'Training data: {args.training_set_kwargs.path}') + print(f'Training duration: {args.total_kimg} kimg') + print(f'Number of GPUs: {args.num_gpus}') + print(f'Number of images: {args.training_set_kwargs.max_size}') + print(f'Image resolution: {args.training_set_kwargs.resolution}') + print(f'Conditional model: {args.training_set_kwargs.use_labels}') + print(f'Dataset x-flips: {args.training_set_kwargs.xflip}') + print() + + # Dry run? + if dry_run: + print('Dry run; exiting.') + return + + # Create output directory. + print('Creating output directory...') + os.makedirs(args.run_dir) + with open(os.path.join(args.run_dir, 'training_options.json'), 'wt') as f: + json.dump(args, f, indent=2) + + # Launch processes. + print('Launching processes...') + torch.multiprocessing.set_start_method('spawn') + with tempfile.TemporaryDirectory() as temp_dir: + if args.num_gpus == 1: + subprocess_fn(rank=0, args=args, temp_dir=temp_dir) + else: + torch.multiprocessing.spawn(fn=subprocess_fn, args=(args, temp_dir), nprocs=args.num_gpus) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() # pylint: disable=no-value-for-parameter + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e1e1a5ba99e56a56ecaa14f7d4fa41777789c0cf --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/augment.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..db3a668c5bfc72235611ac07a247f7dd297d831a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/augment.py @@ -0,0 +1,431 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import numpy as np +import scipy.signal +import torch +from torch_utils import persistence +from torch_utils import misc +from torch_utils.ops import upfirdn2d +from torch_utils.ops import grid_sample_gradfix +from torch_utils.ops import conv2d_gradfix + +#---------------------------------------------------------------------------- +# Coefficients of various wavelet decomposition low-pass filters. + +wavelets = { + 'haar': [0.7071067811865476, 0.7071067811865476], + 'db1': [0.7071067811865476, 0.7071067811865476], + 'db2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025], + 'db3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569], + 'db4': [-0.010597401784997278, 0.032883011666982945, 0.030841381835986965, -0.18703481171888114, -0.02798376941698385, 0.6308807679295904, 0.7148465705525415, 0.23037781330885523], + 'db5': [0.003335725285001549, -0.012580751999015526, -0.006241490213011705, 0.07757149384006515, -0.03224486958502952, -0.24229488706619015, 0.13842814590110342, 0.7243085284385744, 0.6038292697974729, 0.160102397974125], + 'db6': [-0.00107730108499558, 0.004777257511010651, 0.0005538422009938016, -0.031582039318031156, 0.02752286553001629, 0.09750160558707936, -0.12976686756709563, -0.22626469396516913, 0.3152503517092432, 0.7511339080215775, 0.4946238903983854, 0.11154074335008017], + 'db7': [0.0003537138000010399, -0.0018016407039998328, 0.00042957797300470274, 0.012550998556013784, -0.01657454163101562, -0.03802993693503463, 0.0806126091510659, 0.07130921926705004, -0.22403618499416572, -0.14390600392910627, 0.4697822874053586, 0.7291320908465551, 0.39653931948230575, 0.07785205408506236], + 'db8': [-0.00011747678400228192, 0.0006754494059985568, -0.0003917403729959771, -0.00487035299301066, 0.008746094047015655, 0.013981027917015516, -0.04408825393106472, -0.01736930100202211, 0.128747426620186, 0.00047248457399797254, -0.2840155429624281, -0.015829105256023893, 0.5853546836548691, 0.6756307362980128, 0.3128715909144659, 0.05441584224308161], + 'sym2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025], + 'sym3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569], + 'sym4': [-0.07576571478927333, -0.02963552764599851, 0.49761866763201545, 0.8037387518059161, 0.29785779560527736, -0.09921954357684722, -0.012603967262037833, 0.0322231006040427], + 'sym5': [0.027333068345077982, 0.029519490925774643, -0.039134249302383094, 0.1993975339773936, 0.7234076904024206, 0.6339789634582119, 0.01660210576452232, -0.17532808990845047, -0.021101834024758855, 0.019538882735286728], + 'sym6': [0.015404109327027373, 0.0034907120842174702, -0.11799011114819057, -0.048311742585633, 0.4910559419267466, 0.787641141030194, 0.3379294217276218, -0.07263752278646252, -0.021060292512300564, 0.04472490177066578, 0.0017677118642428036, -0.007800708325034148], + 'sym7': [0.002681814568257878, -0.0010473848886829163, -0.01263630340325193, 0.03051551316596357, 0.0678926935013727, -0.049552834937127255, 0.017441255086855827, 0.5361019170917628, 0.767764317003164, 0.2886296317515146, -0.14004724044296152, -0.10780823770381774, 0.004010244871533663, 0.010268176708511255], + 'sym8': [-0.0033824159510061256, -0.0005421323317911481, 0.03169508781149298, 0.007607487324917605, -0.1432942383508097, -0.061273359067658524, 0.4813596512583722, 0.7771857517005235, 0.3644418948353314, -0.05194583810770904, -0.027219029917056003, 0.049137179673607506, 0.003808752013890615, -0.01495225833704823, -0.0003029205147213668, 0.0018899503327594609], +} + +#---------------------------------------------------------------------------- +# Helpers for constructing transformation matrices. + +def matrix(*rows, device=None): + assert all(len(row) == len(rows[0]) for row in rows) + elems = [x for row in rows for x in row] + ref = [x for x in elems if isinstance(x, torch.Tensor)] + if len(ref) == 0: + return misc.constant(np.asarray(rows), device=device) + assert device is None or device == ref[0].device + elems = [x if isinstance(x, torch.Tensor) else misc.constant(x, shape=ref[0].shape, device=ref[0].device) for x in elems] + return torch.stack(elems, dim=-1).reshape(ref[0].shape + (len(rows), -1)) + +def translate2d(tx, ty, **kwargs): + return matrix( + [1, 0, tx], + [0, 1, ty], + [0, 0, 1], + **kwargs) + +def translate3d(tx, ty, tz, **kwargs): + return matrix( + [1, 0, 0, tx], + [0, 1, 0, ty], + [0, 0, 1, tz], + [0, 0, 0, 1], + **kwargs) + +def scale2d(sx, sy, **kwargs): + return matrix( + [sx, 0, 0], + [0, sy, 0], + [0, 0, 1], + **kwargs) + +def scale3d(sx, sy, sz, **kwargs): + return matrix( + [sx, 0, 0, 0], + [0, sy, 0, 0], + [0, 0, sz, 0], + [0, 0, 0, 1], + **kwargs) + +def rotate2d(theta, **kwargs): + return matrix( + [torch.cos(theta), torch.sin(-theta), 0], + [torch.sin(theta), torch.cos(theta), 0], + [0, 0, 1], + **kwargs) + +def rotate3d(v, theta, **kwargs): + vx = v[..., 0]; vy = v[..., 1]; vz = v[..., 2] + s = torch.sin(theta); c = torch.cos(theta); cc = 1 - c + return matrix( + [vx*vx*cc+c, vx*vy*cc-vz*s, vx*vz*cc+vy*s, 0], + [vy*vx*cc+vz*s, vy*vy*cc+c, vy*vz*cc-vx*s, 0], + [vz*vx*cc-vy*s, vz*vy*cc+vx*s, vz*vz*cc+c, 0], + [0, 0, 0, 1], + **kwargs) + +def translate2d_inv(tx, ty, **kwargs): + return translate2d(-tx, -ty, **kwargs) + +def scale2d_inv(sx, sy, **kwargs): + return scale2d(1 / sx, 1 / sy, **kwargs) + +def rotate2d_inv(theta, **kwargs): + return rotate2d(-theta, **kwargs) + +#---------------------------------------------------------------------------- +# Versatile image augmentation pipeline from the paper +# "Training Generative Adversarial Networks with Limited Data". +# +# All augmentations are disabled by default; individual augmentations can +# be enabled by setting their probability multipliers to 1. + +@persistence.persistent_class +class AugmentPipe(torch.nn.Module): + def __init__(self, + xflip=0, rotate90=0, xint=0, xint_max=0.125, + scale=0, rotate=0, aniso=0, xfrac=0, scale_std=0.2, rotate_max=1, aniso_std=0.2, xfrac_std=0.125, + brightness=0, contrast=0, lumaflip=0, hue=0, saturation=0, brightness_std=0.2, contrast_std=0.5, hue_max=1, saturation_std=1, + imgfilter=0, imgfilter_bands=[1,1,1,1], imgfilter_std=1, + noise=0, cutout=0, noise_std=0.1, cutout_size=0.5, + ): + super().__init__() + self.register_buffer('p', torch.ones([])) # Overall multiplier for augmentation probability. + + # Pixel blitting. + self.xflip = float(xflip) # Probability multiplier for x-flip. + self.rotate90 = float(rotate90) # Probability multiplier for 90 degree rotations. + self.xint = float(xint) # Probability multiplier for integer translation. + self.xint_max = float(xint_max) # Range of integer translation, relative to image dimensions. + + # General geometric transformations. + self.scale = float(scale) # Probability multiplier for isotropic scaling. + self.rotate = float(rotate) # Probability multiplier for arbitrary rotation. + self.aniso = float(aniso) # Probability multiplier for anisotropic scaling. + self.xfrac = float(xfrac) # Probability multiplier for fractional translation. + self.scale_std = float(scale_std) # Log2 standard deviation of isotropic scaling. + self.rotate_max = float(rotate_max) # Range of arbitrary rotation, 1 = full circle. + self.aniso_std = float(aniso_std) # Log2 standard deviation of anisotropic scaling. + self.xfrac_std = float(xfrac_std) # Standard deviation of frational translation, relative to image dimensions. + + # Color transformations. + self.brightness = float(brightness) # Probability multiplier for brightness. + self.contrast = float(contrast) # Probability multiplier for contrast. + self.lumaflip = float(lumaflip) # Probability multiplier for luma flip. + self.hue = float(hue) # Probability multiplier for hue rotation. + self.saturation = float(saturation) # Probability multiplier for saturation. + self.brightness_std = float(brightness_std) # Standard deviation of brightness. + self.contrast_std = float(contrast_std) # Log2 standard deviation of contrast. + self.hue_max = float(hue_max) # Range of hue rotation, 1 = full circle. + self.saturation_std = float(saturation_std) # Log2 standard deviation of saturation. + + # Image-space filtering. + self.imgfilter = float(imgfilter) # Probability multiplier for image-space filtering. + self.imgfilter_bands = list(imgfilter_bands) # Probability multipliers for individual frequency bands. + self.imgfilter_std = float(imgfilter_std) # Log2 standard deviation of image-space filter amplification. + + # Image-space corruptions. + self.noise = float(noise) # Probability multiplier for additive RGB noise. + self.cutout = float(cutout) # Probability multiplier for cutout. + self.noise_std = float(noise_std) # Standard deviation of additive RGB noise. + self.cutout_size = float(cutout_size) # Size of the cutout rectangle, relative to image dimensions. + + # Setup orthogonal lowpass filter for geometric augmentations. + self.register_buffer('Hz_geom', upfirdn2d.setup_filter(wavelets['sym6'])) + + # Construct filter bank for image-space filtering. + Hz_lo = np.asarray(wavelets['sym2']) # H(z) + Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z) + Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2 # H(z) * H(z^-1) / 2 + Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2 # H(-z) * H(-z^-1) / 2 + Hz_fbank = np.eye(4, 1) # Bandpass(H(z), b_i) + for i in range(1, Hz_fbank.shape[0]): + Hz_fbank = np.dstack([Hz_fbank, np.zeros_like(Hz_fbank)]).reshape(Hz_fbank.shape[0], -1)[:, :-1] + Hz_fbank = scipy.signal.convolve(Hz_fbank, [Hz_lo2]) + Hz_fbank[i, (Hz_fbank.shape[1] - Hz_hi2.size) // 2 : (Hz_fbank.shape[1] + Hz_hi2.size) // 2] += Hz_hi2 + self.register_buffer('Hz_fbank', torch.as_tensor(Hz_fbank, dtype=torch.float32)) + + def forward(self, images, debug_percentile=None): + assert isinstance(images, torch.Tensor) and images.ndim == 4 + batch_size, num_channels, height, width = images.shape + device = images.device + if debug_percentile is not None: + debug_percentile = torch.as_tensor(debug_percentile, dtype=torch.float32, device=device) + + # ------------------------------------- + # Select parameters for pixel blitting. + # ------------------------------------- + + # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in + I_3 = torch.eye(3, device=device) + G_inv = I_3 + + # Apply x-flip with probability (xflip * strength). + if self.xflip > 0: + i = torch.floor(torch.rand([batch_size], device=device) * 2) + i = torch.where(torch.rand([batch_size], device=device) < self.xflip * self.p, i, torch.zeros_like(i)) + if debug_percentile is not None: + i = torch.full_like(i, torch.floor(debug_percentile * 2)) + G_inv = G_inv @ scale2d_inv(1 - 2 * i, 1) + + # Apply 90 degree rotations with probability (rotate90 * strength). + if self.rotate90 > 0: + i = torch.floor(torch.rand([batch_size], device=device) * 4) + i = torch.where(torch.rand([batch_size], device=device) < self.rotate90 * self.p, i, torch.zeros_like(i)) + if debug_percentile is not None: + i = torch.full_like(i, torch.floor(debug_percentile * 4)) + G_inv = G_inv @ rotate2d_inv(-np.pi / 2 * i) + + # Apply integer translation with probability (xint * strength). + if self.xint > 0: + t = (torch.rand([batch_size, 2], device=device) * 2 - 1) * self.xint_max + t = torch.where(torch.rand([batch_size, 1], device=device) < self.xint * self.p, t, torch.zeros_like(t)) + if debug_percentile is not None: + t = torch.full_like(t, (debug_percentile * 2 - 1) * self.xint_max) + G_inv = G_inv @ translate2d_inv(torch.round(t[:,0] * width), torch.round(t[:,1] * height)) + + # -------------------------------------------------------- + # Select parameters for general geometric transformations. + # -------------------------------------------------------- + + # Apply isotropic scaling with probability (scale * strength). + if self.scale > 0: + s = torch.exp2(torch.randn([batch_size], device=device) * self.scale_std) + s = torch.where(torch.rand([batch_size], device=device) < self.scale * self.p, s, torch.ones_like(s)) + if debug_percentile is not None: + s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.scale_std)) + G_inv = G_inv @ scale2d_inv(s, s) + + # Apply pre-rotation with probability p_rot. + p_rot = 1 - torch.sqrt((1 - self.rotate * self.p).clamp(0, 1)) # P(pre OR post) = p + if self.rotate > 0: + theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max + theta = torch.where(torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta)) + if debug_percentile is not None: + theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.rotate_max) + G_inv = G_inv @ rotate2d_inv(-theta) # Before anisotropic scaling. + + # Apply anisotropic scaling with probability (aniso * strength). + if self.aniso > 0: + s = torch.exp2(torch.randn([batch_size], device=device) * self.aniso_std) + s = torch.where(torch.rand([batch_size], device=device) < self.aniso * self.p, s, torch.ones_like(s)) + if debug_percentile is not None: + s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.aniso_std)) + G_inv = G_inv @ scale2d_inv(s, 1 / s) + + # Apply post-rotation with probability p_rot. + if self.rotate > 0: + theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.rotate_max + theta = torch.where(torch.rand([batch_size], device=device) < p_rot, theta, torch.zeros_like(theta)) + if debug_percentile is not None: + theta = torch.zeros_like(theta) + G_inv = G_inv @ rotate2d_inv(-theta) # After anisotropic scaling. + + # Apply fractional translation with probability (xfrac * strength). + if self.xfrac > 0: + t = torch.randn([batch_size, 2], device=device) * self.xfrac_std + t = torch.where(torch.rand([batch_size, 1], device=device) < self.xfrac * self.p, t, torch.zeros_like(t)) + if debug_percentile is not None: + t = torch.full_like(t, torch.erfinv(debug_percentile * 2 - 1) * self.xfrac_std) + G_inv = G_inv @ translate2d_inv(t[:,0] * width, t[:,1] * height) + + # ---------------------------------- + # Execute geometric transformations. + # ---------------------------------- + + # Execute if the transform is not identity. + if G_inv is not I_3: + + # Calculate padding. + cx = (width - 1) / 2 + cy = (height - 1) / 2 + cp = matrix([-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1], device=device) # [idx, xyz] + cp = G_inv @ cp.t() # [batch, xyz, idx] + Hz_pad = self.Hz_geom.shape[0] // 4 + margin = cp[:, :2, :].permute(1, 0, 2).flatten(1) # [xy, batch * idx] + margin = torch.cat([-margin, margin]).max(dim=1).values # [x0, y0, x1, y1] + margin = margin + misc.constant([Hz_pad * 2 - cx, Hz_pad * 2 - cy] * 2, device=device) + margin = margin.max(misc.constant([0, 0] * 2, device=device)) + margin = margin.min(misc.constant([width-1, height-1] * 2, device=device)) + mx0, my0, mx1, my1 = margin.ceil().to(torch.int32) + + # Pad image and adjust origin. + images = torch.nn.functional.pad(input=images, pad=[mx0,mx1,my0,my1], mode='reflect') + G_inv = translate2d((mx0 - mx1) / 2, (my0 - my1) / 2) @ G_inv + + # Upsample. + images = upfirdn2d.upsample2d(x=images, f=self.Hz_geom, up=2) + G_inv = scale2d(2, 2, device=device) @ G_inv @ scale2d_inv(2, 2, device=device) + G_inv = translate2d(-0.5, -0.5, device=device) @ G_inv @ translate2d_inv(-0.5, -0.5, device=device) + + # Execute transformation. + shape = [batch_size, num_channels, (height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2] + G_inv = scale2d(2 / images.shape[3], 2 / images.shape[2], device=device) @ G_inv @ scale2d_inv(2 / shape[3], 2 / shape[2], device=device) + grid = torch.nn.functional.affine_grid(theta=G_inv[:,:2,:], size=shape, align_corners=False) + images = grid_sample_gradfix.grid_sample(images, grid) + + # Downsample and crop. + images = upfirdn2d.downsample2d(x=images, f=self.Hz_geom, down=2, padding=-Hz_pad*2, flip_filter=True) + + # -------------------------------------------- + # Select parameters for color transformations. + # -------------------------------------------- + + # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out + I_4 = torch.eye(4, device=device) + C = I_4 + + # Apply brightness with probability (brightness * strength). + if self.brightness > 0: + b = torch.randn([batch_size], device=device) * self.brightness_std + b = torch.where(torch.rand([batch_size], device=device) < self.brightness * self.p, b, torch.zeros_like(b)) + if debug_percentile is not None: + b = torch.full_like(b, torch.erfinv(debug_percentile * 2 - 1) * self.brightness_std) + C = translate3d(b, b, b) @ C + + # Apply contrast with probability (contrast * strength). + if self.contrast > 0: + c = torch.exp2(torch.randn([batch_size], device=device) * self.contrast_std) + c = torch.where(torch.rand([batch_size], device=device) < self.contrast * self.p, c, torch.ones_like(c)) + if debug_percentile is not None: + c = torch.full_like(c, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.contrast_std)) + C = scale3d(c, c, c) @ C + + # Apply luma flip with probability (lumaflip * strength). + v = misc.constant(np.asarray([1, 1, 1, 0]) / np.sqrt(3), device=device) # Luma axis. + if self.lumaflip > 0: + i = torch.floor(torch.rand([batch_size, 1, 1], device=device) * 2) + i = torch.where(torch.rand([batch_size, 1, 1], device=device) < self.lumaflip * self.p, i, torch.zeros_like(i)) + if debug_percentile is not None: + i = torch.full_like(i, torch.floor(debug_percentile * 2)) + C = (I_4 - 2 * v.ger(v) * i) @ C # Householder reflection. + + # Apply hue rotation with probability (hue * strength). + if self.hue > 0 and num_channels > 1: + theta = (torch.rand([batch_size], device=device) * 2 - 1) * np.pi * self.hue_max + theta = torch.where(torch.rand([batch_size], device=device) < self.hue * self.p, theta, torch.zeros_like(theta)) + if debug_percentile is not None: + theta = torch.full_like(theta, (debug_percentile * 2 - 1) * np.pi * self.hue_max) + C = rotate3d(v, theta) @ C # Rotate around v. + + # Apply saturation with probability (saturation * strength). + if self.saturation > 0 and num_channels > 1: + s = torch.exp2(torch.randn([batch_size, 1, 1], device=device) * self.saturation_std) + s = torch.where(torch.rand([batch_size, 1, 1], device=device) < self.saturation * self.p, s, torch.ones_like(s)) + if debug_percentile is not None: + s = torch.full_like(s, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.saturation_std)) + C = (v.ger(v) + (I_4 - v.ger(v)) * s) @ C + + # ------------------------------ + # Execute color transformations. + # ------------------------------ + + # Execute if the transform is not identity. + if C is not I_4: + images = images.reshape([batch_size, num_channels, height * width]) + if num_channels == 3: + images = C[:, :3, :3] @ images + C[:, :3, 3:] + elif num_channels == 1: + C = C[:, :3, :].mean(dim=1, keepdims=True) + images = images * C[:, :, :3].sum(dim=2, keepdims=True) + C[:, :, 3:] + else: + raise ValueError('Image must be RGB (3 channels) or L (1 channel)') + images = images.reshape([batch_size, num_channels, height, width]) + + # ---------------------- + # Image-space filtering. + # ---------------------- + + if self.imgfilter > 0: + num_bands = self.Hz_fbank.shape[0] + assert len(self.imgfilter_bands) == num_bands + expected_power = misc.constant(np.array([10, 1, 1, 1]) / 13, device=device) # Expected power spectrum (1/f). + + # Apply amplification for each band with probability (imgfilter * strength * band_strength). + g = torch.ones([batch_size, num_bands], device=device) # Global gain vector (identity). + for i, band_strength in enumerate(self.imgfilter_bands): + t_i = torch.exp2(torch.randn([batch_size], device=device) * self.imgfilter_std) + t_i = torch.where(torch.rand([batch_size], device=device) < self.imgfilter * self.p * band_strength, t_i, torch.ones_like(t_i)) + if debug_percentile is not None: + t_i = torch.full_like(t_i, torch.exp2(torch.erfinv(debug_percentile * 2 - 1) * self.imgfilter_std)) if band_strength > 0 else torch.ones_like(t_i) + t = torch.ones([batch_size, num_bands], device=device) # Temporary gain vector. + t[:, i] = t_i # Replace i'th element. + t = t / (expected_power * t.square()).sum(dim=-1, keepdims=True).sqrt() # Normalize power. + g = g * t # Accumulate into global gain. + + # Construct combined amplification filter. + Hz_prime = g @ self.Hz_fbank # [batch, tap] + Hz_prime = Hz_prime.unsqueeze(1).repeat([1, num_channels, 1]) # [batch, channels, tap] + Hz_prime = Hz_prime.reshape([batch_size * num_channels, 1, -1]) # [batch * channels, 1, tap] + + # Apply filter. + p = self.Hz_fbank.shape[1] // 2 + images = images.reshape([1, batch_size * num_channels, height, width]) + images = torch.nn.functional.pad(input=images, pad=[p,p,p,p], mode='reflect') + images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(2), groups=batch_size*num_channels) + images = conv2d_gradfix.conv2d(input=images, weight=Hz_prime.unsqueeze(3), groups=batch_size*num_channels) + images = images.reshape([batch_size, num_channels, height, width]) + + # ------------------------ + # Image-space corruptions. + # ------------------------ + + # Apply additive RGB noise with probability (noise * strength). + if self.noise > 0: + sigma = torch.randn([batch_size, 1, 1, 1], device=device).abs() * self.noise_std + sigma = torch.where(torch.rand([batch_size, 1, 1, 1], device=device) < self.noise * self.p, sigma, torch.zeros_like(sigma)) + if debug_percentile is not None: + sigma = torch.full_like(sigma, torch.erfinv(debug_percentile) * self.noise_std) + images = images + torch.randn([batch_size, num_channels, height, width], device=device) * sigma + + # Apply cutout with probability (cutout * strength). + if self.cutout > 0: + size = torch.full([batch_size, 2, 1, 1, 1], self.cutout_size, device=device) + size = torch.where(torch.rand([batch_size, 1, 1, 1, 1], device=device) < self.cutout * self.p, size, torch.zeros_like(size)) + center = torch.rand([batch_size, 2, 1, 1, 1], device=device) + if debug_percentile is not None: + size = torch.full_like(size, self.cutout_size) + center = torch.full_like(center, debug_percentile) + coord_x = torch.arange(width, device=device).reshape([1, 1, 1, -1]) + coord_y = torch.arange(height, device=device).reshape([1, 1, -1, 1]) + mask_x = (((coord_x + 0.5) / width - center[:, 0]).abs() >= size[:, 0] / 2) + mask_y = (((coord_y + 0.5) / height - center[:, 1]).abs() >= size[:, 1] / 2) + mask = torch.logical_or(mask_x, mask_y).to(torch.float32) + images = images * mask + + return images + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/dataset.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ba31ba549e78fd2618e1faa40c3957be94b60b00 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/dataset.py @@ -0,0 +1,236 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import os +import numpy as np +import zipfile +import PIL.Image +import json +import torch +import dnnlib + +try: + import pyspng +except ImportError: + pyspng = None + +#---------------------------------------------------------------------------- + +class Dataset(torch.utils.data.Dataset): + def __init__(self, + name, # Name of the dataset. + raw_shape, # Shape of the raw image data (NCHW). + max_size = None, # Artificially limit the size of the dataset. None = no limit. Applied before xflip. + use_labels = False, # Enable conditioning labels? False = label dimension is zero. + xflip = False, # Artificially double the size of the dataset via x-flips. Applied after max_size. + random_seed = 0, # Random seed to use when applying max_size. + ): + self._name = name + self._raw_shape = list(raw_shape) + self._use_labels = use_labels + self._raw_labels = None + self._label_shape = None + + # Apply max_size. + self._raw_idx = np.arange(self._raw_shape[0], dtype=np.int64) + if (max_size is not None) and (self._raw_idx.size > max_size): + np.random.RandomState(random_seed).shuffle(self._raw_idx) + self._raw_idx = np.sort(self._raw_idx[:max_size]) + + # Apply xflip. + self._xflip = np.zeros(self._raw_idx.size, dtype=np.uint8) + if xflip: + self._raw_idx = np.tile(self._raw_idx, 2) + self._xflip = np.concatenate([self._xflip, np.ones_like(self._xflip)]) + + def _get_raw_labels(self): + if self._raw_labels is None: + self._raw_labels = self._load_raw_labels() if self._use_labels else None + if self._raw_labels is None: + self._raw_labels = np.zeros([self._raw_shape[0], 0], dtype=np.float32) + assert isinstance(self._raw_labels, np.ndarray) + assert self._raw_labels.shape[0] == self._raw_shape[0] + assert self._raw_labels.dtype in [np.float32, np.int64] + if self._raw_labels.dtype == np.int64: + assert self._raw_labels.ndim == 1 + assert np.all(self._raw_labels >= 0) + return self._raw_labels + + def close(self): # to be overridden by subclass + pass + + def _load_raw_image(self, raw_idx): # to be overridden by subclass + raise NotImplementedError + + def _load_raw_labels(self): # to be overridden by subclass + raise NotImplementedError + + def __getstate__(self): + return dict(self.__dict__, _raw_labels=None) + + def __del__(self): + try: + self.close() + except: + pass + + def __len__(self): + return self._raw_idx.size + + def __getitem__(self, idx): + image = self._load_raw_image(self._raw_idx[idx]) + assert isinstance(image, np.ndarray) + assert list(image.shape) == self.image_shape + assert image.dtype == np.uint8 + if self._xflip[idx]: + assert image.ndim == 3 # CHW + image = image[:, :, ::-1] + return image.copy(), self.get_label(idx) + + def get_label(self, idx): + label = self._get_raw_labels()[self._raw_idx[idx]] + if label.dtype == np.int64: + onehot = np.zeros(self.label_shape, dtype=np.float32) + onehot[label] = 1 + label = onehot + return label.copy() + + def get_details(self, idx): + d = dnnlib.EasyDict() + d.raw_idx = int(self._raw_idx[idx]) + d.xflip = (int(self._xflip[idx]) != 0) + d.raw_label = self._get_raw_labels()[d.raw_idx].copy() + return d + + @property + def name(self): + return self._name + + @property + def image_shape(self): + return list(self._raw_shape[1:]) + + @property + def num_channels(self): + assert len(self.image_shape) == 3 # CHW + return self.image_shape[0] + + @property + def resolution(self): + assert len(self.image_shape) == 3 # CHW + assert self.image_shape[1] == self.image_shape[2] + return self.image_shape[1] + + @property + def label_shape(self): + if self._label_shape is None: + raw_labels = self._get_raw_labels() + if raw_labels.dtype == np.int64: + self._label_shape = [int(np.max(raw_labels)) + 1] + else: + self._label_shape = raw_labels.shape[1:] + return list(self._label_shape) + + @property + def label_dim(self): + assert len(self.label_shape) == 1 + return self.label_shape[0] + + @property + def has_labels(self): + return any(x != 0 for x in self.label_shape) + + @property + def has_onehot_labels(self): + return self._get_raw_labels().dtype == np.int64 + +#---------------------------------------------------------------------------- + +class ImageFolderDataset(Dataset): + def __init__(self, + path, # Path to directory or zip. + resolution = None, # Ensure specific resolution, None = highest available. + **super_kwargs, # Additional arguments for the Dataset base class. + ): + self._path = path + self._zipfile = None + + if os.path.isdir(self._path): + self._type = 'dir' + self._all_fnames = {os.path.relpath(os.path.join(root, fname), start=self._path) for root, _dirs, files in os.walk(self._path) for fname in files} + elif self._file_ext(self._path) == '.zip': + self._type = 'zip' + self._all_fnames = set(self._get_zipfile().namelist()) + else: + raise IOError('Path must point to a directory or zip') + + PIL.Image.init() + self._image_fnames = sorted(fname for fname in self._all_fnames if self._file_ext(fname) in PIL.Image.EXTENSION) + if len(self._image_fnames) == 0: + raise IOError('No image files found in the specified path') + + name = os.path.splitext(os.path.basename(self._path))[0] + raw_shape = [len(self._image_fnames)] + list(self._load_raw_image(0).shape) + if resolution is not None and (raw_shape[2] != resolution or raw_shape[3] != resolution): + raise IOError('Image files do not match the specified resolution') + super().__init__(name=name, raw_shape=raw_shape, **super_kwargs) + + @staticmethod + def _file_ext(fname): + return os.path.splitext(fname)[1].lower() + + def _get_zipfile(self): + assert self._type == 'zip' + if self._zipfile is None: + self._zipfile = zipfile.ZipFile(self._path) + return self._zipfile + + def _open_file(self, fname): + if self._type == 'dir': + return open(os.path.join(self._path, fname), 'rb') + if self._type == 'zip': + return self._get_zipfile().open(fname, 'r') + return None + + def close(self): + try: + if self._zipfile is not None: + self._zipfile.close() + finally: + self._zipfile = None + + def __getstate__(self): + return dict(super().__getstate__(), _zipfile=None) + + def _load_raw_image(self, raw_idx): + fname = self._image_fnames[raw_idx] + with self._open_file(fname) as f: + if pyspng is not None and self._file_ext(fname) == '.png': + image = pyspng.load(f.read()) + else: + image = np.array(PIL.Image.open(f)) + if image.ndim == 2: + image = image[:, :, np.newaxis] # HW => HWC + image = image.transpose(2, 0, 1) # HWC => CHW + return image + + def _load_raw_labels(self): + fname = 'dataset.json' + if fname not in self._all_fnames: + return None + with self._open_file(fname) as f: + labels = json.load(f)['labels'] + if labels is None: + return None + labels = dict(labels) + labels = [labels[fname.replace('\\', '/')] for fname in self._image_fnames] + labels = np.array(labels) + labels = labels.astype({1: np.int64, 2: np.float32}[labels.ndim]) + return labels + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/loss.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..366ab1cf750c373a0a40b4613fb9b15d4bbe2dca --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/loss.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import numpy as np +import torch +from torch_utils import training_stats +from torch_utils import misc +from torch_utils.ops import conv2d_gradfix + +#---------------------------------------------------------------------------- + +class Loss: + def accumulate_gradients(self, phase, real_img, real_c, gen_z, gen_c, sync, gain): # to be overridden by subclass + raise NotImplementedError() + +#---------------------------------------------------------------------------- + +class StyleGAN2Loss(Loss): + def __init__(self, device, G_mapping, G_synthesis, D, augment_pipe=None, style_mixing_prob=0.9, r1_gamma=10, pl_batch_shrink=2, pl_decay=0.01, pl_weight=2): + super().__init__() + self.device = device + self.G_mapping = G_mapping + self.G_synthesis = G_synthesis + self.D = D + self.augment_pipe = augment_pipe + self.style_mixing_prob = style_mixing_prob + self.r1_gamma = r1_gamma + self.pl_batch_shrink = pl_batch_shrink + self.pl_decay = pl_decay + self.pl_weight = pl_weight + self.pl_mean = torch.zeros([], device=device) + + def run_G(self, z, c, sync): + with misc.ddp_sync(self.G_mapping, sync): + ws = self.G_mapping(z, c) + if self.style_mixing_prob > 0: + with torch.autograd.profiler.record_function('style_mixing'): + cutoff = torch.empty([], dtype=torch.int64, device=ws.device).random_(1, ws.shape[1]) + cutoff = torch.where(torch.rand([], device=ws.device) < self.style_mixing_prob, cutoff, torch.full_like(cutoff, ws.shape[1])) + ws[:, cutoff:] = self.G_mapping(torch.randn_like(z), c, skip_w_avg_update=True)[:, cutoff:] + with misc.ddp_sync(self.G_synthesis, sync): + img = self.G_synthesis(ws) + return img, ws + + def run_D(self, img, c, sync): + if self.augment_pipe is not None: + img = self.augment_pipe(img) + with misc.ddp_sync(self.D, sync): + logits = self.D(img, c) + return logits + + def accumulate_gradients(self, phase, real_img, real_c, gen_z, gen_c, sync, gain): + assert phase in ['Gmain', 'Greg', 'Gboth', 'Dmain', 'Dreg', 'Dboth'] + do_Gmain = (phase in ['Gmain', 'Gboth']) + do_Dmain = (phase in ['Dmain', 'Dboth']) + do_Gpl = (phase in ['Greg', 'Gboth']) and (self.pl_weight != 0) + do_Dr1 = (phase in ['Dreg', 'Dboth']) and (self.r1_gamma != 0) + + # Gmain: Maximize logits for generated images. + if do_Gmain: + with torch.autograd.profiler.record_function('Gmain_forward'): + gen_img, _gen_ws = self.run_G(gen_z, gen_c, sync=(sync and not do_Gpl)) # May get synced by Gpl. + gen_logits = self.run_D(gen_img, gen_c, sync=False) + training_stats.report('Loss/scores/fake', gen_logits) + training_stats.report('Loss/signs/fake', gen_logits.sign()) + loss_Gmain = torch.nn.functional.softplus(-gen_logits) # -log(sigmoid(gen_logits)) + training_stats.report('Loss/G/loss', loss_Gmain) + with torch.autograd.profiler.record_function('Gmain_backward'): + loss_Gmain.mean().mul(gain).backward() + + # Gpl: Apply path length regularization. + if do_Gpl: + with torch.autograd.profiler.record_function('Gpl_forward'): + batch_size = gen_z.shape[0] // self.pl_batch_shrink + gen_img, gen_ws = self.run_G(gen_z[:batch_size], gen_c[:batch_size], sync=sync) + pl_noise = torch.randn_like(gen_img) / np.sqrt(gen_img.shape[2] * gen_img.shape[3]) + with torch.autograd.profiler.record_function('pl_grads'), conv2d_gradfix.no_weight_gradients(): + pl_grads = torch.autograd.grad(outputs=[(gen_img * pl_noise).sum()], inputs=[gen_ws], create_graph=True, only_inputs=True)[0] + pl_lengths = pl_grads.square().sum(2).mean(1).sqrt() + pl_mean = self.pl_mean.lerp(pl_lengths.mean(), self.pl_decay) + self.pl_mean.copy_(pl_mean.detach()) + pl_penalty = (pl_lengths - pl_mean).square() + training_stats.report('Loss/pl_penalty', pl_penalty) + loss_Gpl = pl_penalty * self.pl_weight + training_stats.report('Loss/G/reg', loss_Gpl) + with torch.autograd.profiler.record_function('Gpl_backward'): + (gen_img[:, 0, 0, 0] * 0 + loss_Gpl).mean().mul(gain).backward() + + # Dmain: Minimize logits for generated images. + loss_Dgen = 0 + if do_Dmain: + with torch.autograd.profiler.record_function('Dgen_forward'): + gen_img, _gen_ws = self.run_G(gen_z, gen_c, sync=False) + gen_logits = self.run_D(gen_img, gen_c, sync=False) # Gets synced by loss_Dreal. + training_stats.report('Loss/scores/fake', gen_logits) + training_stats.report('Loss/signs/fake', gen_logits.sign()) + loss_Dgen = torch.nn.functional.softplus(gen_logits) # -log(1 - sigmoid(gen_logits)) + with torch.autograd.profiler.record_function('Dgen_backward'): + loss_Dgen.mean().mul(gain).backward() + + # Dmain: Maximize logits for real images. + # Dr1: Apply R1 regularization. + if do_Dmain or do_Dr1: + name = 'Dreal_Dr1' if do_Dmain and do_Dr1 else 'Dreal' if do_Dmain else 'Dr1' + with torch.autograd.profiler.record_function(name + '_forward'): + real_img_tmp = real_img.detach().requires_grad_(do_Dr1) + real_logits = self.run_D(real_img_tmp, real_c, sync=sync) + training_stats.report('Loss/scores/real', real_logits) + training_stats.report('Loss/signs/real', real_logits.sign()) + + loss_Dreal = 0 + if do_Dmain: + loss_Dreal = torch.nn.functional.softplus(-real_logits) # -log(sigmoid(real_logits)) + training_stats.report('Loss/D/loss', loss_Dgen + loss_Dreal) + + loss_Dr1 = 0 + if do_Dr1: + with torch.autograd.profiler.record_function('r1_grads'), conv2d_gradfix.no_weight_gradients(): + r1_grads = torch.autograd.grad(outputs=[real_logits.sum()], inputs=[real_img_tmp], create_graph=True, only_inputs=True)[0] + r1_penalty = r1_grads.square().sum([1,2,3]) + loss_Dr1 = r1_penalty * (self.r1_gamma / 2) + training_stats.report('Loss/r1_penalty', r1_penalty) + training_stats.report('Loss/D/reg', loss_Dr1) + + with torch.autograd.profiler.record_function(name + '_backward'): + (real_logits * 0 + loss_Dreal + loss_Dr1).mean().mul(gain).backward() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/networks.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..1153b3170a1a85a4dfc1cb33c456f999eda2bb54 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/networks.py @@ -0,0 +1,729 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import numpy as np +import torch +from torch_utils import misc +from torch_utils import persistence +from torch_utils.ops import conv2d_resample +from torch_utils.ops import upfirdn2d +from torch_utils.ops import bias_act +from torch_utils.ops import fma + +#---------------------------------------------------------------------------- + +@misc.profiled_function +def normalize_2nd_moment(x, dim=1, eps=1e-8): + return x * (x.square().mean(dim=dim, keepdim=True) + eps).rsqrt() + +#---------------------------------------------------------------------------- + +@misc.profiled_function +def modulated_conv2d( + x, # Input tensor of shape [batch_size, in_channels, in_height, in_width]. + weight, # Weight tensor of shape [out_channels, in_channels, kernel_height, kernel_width]. + styles, # Modulation coefficients of shape [batch_size, in_channels]. + noise = None, # Optional noise tensor to add to the output activations. + up = 1, # Integer upsampling factor. + down = 1, # Integer downsampling factor. + padding = 0, # Padding with respect to the upsampled image. + resample_filter = None, # Low-pass filter to apply when resampling activations. Must be prepared beforehand by calling upfirdn2d.setup_filter(). + demodulate = True, # Apply weight demodulation? + flip_weight = True, # False = convolution, True = correlation (matches torch.nn.functional.conv2d). + fused_modconv = True, # Perform modulation, convolution, and demodulation as a single fused operation? +): + batch_size = x.shape[0] + out_channels, in_channels, kh, kw = weight.shape + misc.assert_shape(weight, [out_channels, in_channels, kh, kw]) # [OIkk] + misc.assert_shape(x, [batch_size, in_channels, None, None]) # [NIHW] + misc.assert_shape(styles, [batch_size, in_channels]) # [NI] + + # Pre-normalize inputs to avoid FP16 overflow. + if x.dtype == torch.float16 and demodulate: + weight = weight * (1 / np.sqrt(in_channels * kh * kw) / weight.norm(float('inf'), dim=[1,2,3], keepdim=True)) # max_Ikk + styles = styles / styles.norm(float('inf'), dim=1, keepdim=True) # max_I + + # Calculate per-sample weights and demodulation coefficients. + w = None + dcoefs = None + if demodulate or fused_modconv: + w = weight.unsqueeze(0) # [NOIkk] + w = w * styles.reshape(batch_size, 1, -1, 1, 1) # [NOIkk] + if demodulate: + dcoefs = (w.square().sum(dim=[2,3,4]) + 1e-8).rsqrt() # [NO] + if demodulate and fused_modconv: + w = w * dcoefs.reshape(batch_size, -1, 1, 1, 1) # [NOIkk] + + # Execute by scaling the activations before and after the convolution. + if not fused_modconv: + x = x * styles.to(x.dtype).reshape(batch_size, -1, 1, 1) + x = conv2d_resample.conv2d_resample(x=x, w=weight.to(x.dtype), f=resample_filter, up=up, down=down, padding=padding, flip_weight=flip_weight) + if demodulate and noise is not None: + x = fma.fma(x, dcoefs.to(x.dtype).reshape(batch_size, -1, 1, 1), noise.to(x.dtype)) + elif demodulate: + x = x * dcoefs.to(x.dtype).reshape(batch_size, -1, 1, 1) + elif noise is not None: + x = x.add_(noise.to(x.dtype)) + return x + + # Execute as one fused op using grouped convolution. + with misc.suppress_tracer_warnings(): # this value will be treated as a constant + batch_size = int(batch_size) + misc.assert_shape(x, [batch_size, in_channels, None, None]) + x = x.reshape(1, -1, *x.shape[2:]) + w = w.reshape(-1, in_channels, kh, kw) + x = conv2d_resample.conv2d_resample(x=x, w=w.to(x.dtype), f=resample_filter, up=up, down=down, padding=padding, groups=batch_size, flip_weight=flip_weight) + x = x.reshape(batch_size, -1, *x.shape[2:]) + if noise is not None: + x = x.add_(noise) + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class FullyConnectedLayer(torch.nn.Module): + def __init__(self, + in_features, # Number of input features. + out_features, # Number of output features. + bias = True, # Apply additive bias before the activation function? + activation = 'linear', # Activation function: 'relu', 'lrelu', etc. + lr_multiplier = 1, # Learning rate multiplier. + bias_init = 0, # Initial value for the additive bias. + ): + super().__init__() + self.activation = activation + self.weight = torch.nn.Parameter(torch.randn([out_features, in_features]) / lr_multiplier) + self.bias = torch.nn.Parameter(torch.full([out_features], np.float32(bias_init))) if bias else None + self.weight_gain = lr_multiplier / np.sqrt(in_features) + self.bias_gain = lr_multiplier + + def forward(self, x): + w = self.weight.to(x.dtype) * self.weight_gain + b = self.bias + if b is not None: + b = b.to(x.dtype) + if self.bias_gain != 1: + b = b * self.bias_gain + + if self.activation == 'linear' and b is not None: + x = torch.addmm(b.unsqueeze(0), x, w.t()) + else: + x = x.matmul(w.t()) + x = bias_act.bias_act(x, b, act=self.activation) + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class Conv2dLayer(torch.nn.Module): + def __init__(self, + in_channels, # Number of input channels. + out_channels, # Number of output channels. + kernel_size, # Width and height of the convolution kernel. + bias = True, # Apply additive bias before the activation function? + activation = 'linear', # Activation function: 'relu', 'lrelu', etc. + up = 1, # Integer upsampling factor. + down = 1, # Integer downsampling factor. + resample_filter = [1,3,3,1], # Low-pass filter to apply when resampling activations. + conv_clamp = None, # Clamp the output to +-X, None = disable clamping. + channels_last = False, # Expect the input to have memory_format=channels_last? + trainable = True, # Update the weights of this layer during training? + ): + super().__init__() + self.activation = activation + self.up = up + self.down = down + self.conv_clamp = conv_clamp + self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter)) + self.padding = kernel_size // 2 + self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2)) + self.act_gain = bias_act.activation_funcs[activation].def_gain + + memory_format = torch.channels_last if channels_last else torch.contiguous_format + weight = torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format) + bias = torch.zeros([out_channels]) if bias else None + if trainable: + self.weight = torch.nn.Parameter(weight) + self.bias = torch.nn.Parameter(bias) if bias is not None else None + else: + self.register_buffer('weight', weight) + if bias is not None: + self.register_buffer('bias', bias) + else: + self.bias = None + + def forward(self, x, gain=1): + w = self.weight * self.weight_gain + b = self.bias.to(x.dtype) if self.bias is not None else None + flip_weight = (self.up == 1) # slightly faster + x = conv2d_resample.conv2d_resample(x=x, w=w.to(x.dtype), f=self.resample_filter, up=self.up, down=self.down, padding=self.padding, flip_weight=flip_weight) + + act_gain = self.act_gain * gain + act_clamp = self.conv_clamp * gain if self.conv_clamp is not None else None + x = bias_act.bias_act(x, b, act=self.activation, gain=act_gain, clamp=act_clamp) + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class MappingNetwork(torch.nn.Module): + def __init__(self, + z_dim, # Input latent (Z) dimensionality, 0 = no latent. + c_dim, # Conditioning label (C) dimensionality, 0 = no label. + w_dim, # Intermediate latent (W) dimensionality. + num_ws, # Number of intermediate latents to output, None = do not broadcast. + num_layers = 8, # Number of mapping layers. + embed_features = None, # Label embedding dimensionality, None = same as w_dim. + layer_features = None, # Number of intermediate features in the mapping layers, None = same as w_dim. + activation = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + lr_multiplier = 0.01, # Learning rate multiplier for the mapping layers. + w_avg_beta = 0.995, # Decay for tracking the moving average of W during training, None = do not track. + ): + super().__init__() + self.z_dim = z_dim + self.c_dim = c_dim + self.w_dim = w_dim + self.num_ws = num_ws + self.num_layers = num_layers + self.w_avg_beta = w_avg_beta + + if embed_features is None: + embed_features = w_dim + if c_dim == 0: + embed_features = 0 + if layer_features is None: + layer_features = w_dim + features_list = [z_dim + embed_features] + [layer_features] * (num_layers - 1) + [w_dim] + + if c_dim > 0: + self.embed = FullyConnectedLayer(c_dim, embed_features) + for idx in range(num_layers): + in_features = features_list[idx] + out_features = features_list[idx + 1] + layer = FullyConnectedLayer(in_features, out_features, activation=activation, lr_multiplier=lr_multiplier) + setattr(self, f'fc{idx}', layer) + + if num_ws is not None and w_avg_beta is not None: + self.register_buffer('w_avg', torch.zeros([w_dim])) + + def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, skip_w_avg_update=False): + # Embed, normalize, and concat inputs. + x = None + with torch.autograd.profiler.record_function('input'): + if self.z_dim > 0: + misc.assert_shape(z, [None, self.z_dim]) + x = normalize_2nd_moment(z.to(torch.float32)) + if self.c_dim > 0: + misc.assert_shape(c, [None, self.c_dim]) + y = normalize_2nd_moment(self.embed(c.to(torch.float32))) + x = torch.cat([x, y], dim=1) if x is not None else y + + # Main layers. + for idx in range(self.num_layers): + layer = getattr(self, f'fc{idx}') + x = layer(x) + + # Update moving average of W. + if self.w_avg_beta is not None and self.training and not skip_w_avg_update: + with torch.autograd.profiler.record_function('update_w_avg'): + self.w_avg.copy_(x.detach().mean(dim=0).lerp(self.w_avg, self.w_avg_beta)) + + # Broadcast. + if self.num_ws is not None: + with torch.autograd.profiler.record_function('broadcast'): + x = x.unsqueeze(1).repeat([1, self.num_ws, 1]) + + # Apply truncation. + if truncation_psi != 1: + with torch.autograd.profiler.record_function('truncate'): + assert self.w_avg_beta is not None + if self.num_ws is None or truncation_cutoff is None: + x = self.w_avg.lerp(x, truncation_psi) + else: + x[:, :truncation_cutoff] = self.w_avg.lerp(x[:, :truncation_cutoff], truncation_psi) + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class SynthesisLayer(torch.nn.Module): + def __init__(self, + in_channels, # Number of input channels. + out_channels, # Number of output channels. + w_dim, # Intermediate latent (W) dimensionality. + resolution, # Resolution of this layer. + kernel_size = 3, # Convolution kernel size. + up = 1, # Integer upsampling factor. + use_noise = True, # Enable noise input? + activation = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + resample_filter = [1,3,3,1], # Low-pass filter to apply when resampling activations. + conv_clamp = None, # Clamp the output of convolution layers to +-X, None = disable clamping. + channels_last = False, # Use channels_last format for the weights? + ): + super().__init__() + self.resolution = resolution + self.up = up + self.use_noise = use_noise + self.activation = activation + self.conv_clamp = conv_clamp + self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter)) + self.padding = kernel_size // 2 + self.act_gain = bias_act.activation_funcs[activation].def_gain + + self.affine = FullyConnectedLayer(w_dim, in_channels, bias_init=1) + memory_format = torch.channels_last if channels_last else torch.contiguous_format + self.weight = torch.nn.Parameter(torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format)) + if use_noise: + self.register_buffer('noise_const', torch.randn([resolution, resolution])) + self.noise_strength = torch.nn.Parameter(torch.zeros([])) + self.bias = torch.nn.Parameter(torch.zeros([out_channels])) + + def forward(self, x, w, noise_mode='random', fused_modconv=True, gain=1): + assert noise_mode in ['random', 'const', 'none'] + in_resolution = self.resolution // self.up + misc.assert_shape(x, [None, self.weight.shape[1], in_resolution, in_resolution]) + styles = self.affine(w) + + noise = None + if self.use_noise and noise_mode == 'random': + noise = torch.randn([x.shape[0], 1, self.resolution, self.resolution], device=x.device) * self.noise_strength + if self.use_noise and noise_mode == 'const': + noise = self.noise_const * self.noise_strength + + flip_weight = (self.up == 1) # slightly faster + x = modulated_conv2d(x=x, weight=self.weight, styles=styles, noise=noise, up=self.up, + padding=self.padding, resample_filter=self.resample_filter, flip_weight=flip_weight, fused_modconv=fused_modconv) + + act_gain = self.act_gain * gain + act_clamp = self.conv_clamp * gain if self.conv_clamp is not None else None + x = bias_act.bias_act(x, self.bias.to(x.dtype), act=self.activation, gain=act_gain, clamp=act_clamp) + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class ToRGBLayer(torch.nn.Module): + def __init__(self, in_channels, out_channels, w_dim, kernel_size=1, conv_clamp=None, channels_last=False): + super().__init__() + self.conv_clamp = conv_clamp + self.affine = FullyConnectedLayer(w_dim, in_channels, bias_init=1) + memory_format = torch.channels_last if channels_last else torch.contiguous_format + self.weight = torch.nn.Parameter(torch.randn([out_channels, in_channels, kernel_size, kernel_size]).to(memory_format=memory_format)) + self.bias = torch.nn.Parameter(torch.zeros([out_channels])) + self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2)) + + def forward(self, x, w, fused_modconv=True): + styles = self.affine(w) * self.weight_gain + x = modulated_conv2d(x=x, weight=self.weight, styles=styles, demodulate=False, fused_modconv=fused_modconv) + x = bias_act.bias_act(x, self.bias.to(x.dtype), clamp=self.conv_clamp) + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class SynthesisBlock(torch.nn.Module): + def __init__(self, + in_channels, # Number of input channels, 0 = first block. + out_channels, # Number of output channels. + w_dim, # Intermediate latent (W) dimensionality. + resolution, # Resolution of this block. + img_channels, # Number of output color channels. + is_last, # Is this the last block? + architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'. + resample_filter = [1,3,3,1], # Low-pass filter to apply when resampling activations. + conv_clamp = None, # Clamp the output of convolution layers to +-X, None = disable clamping. + use_fp16 = False, # Use FP16 for this block? + fp16_channels_last = False, # Use channels-last memory format with FP16? + **layer_kwargs, # Arguments for SynthesisLayer. + ): + assert architecture in ['orig', 'skip', 'resnet'] + super().__init__() + self.in_channels = in_channels + self.w_dim = w_dim + self.resolution = resolution + self.img_channels = img_channels + self.is_last = is_last + self.architecture = architecture + self.use_fp16 = use_fp16 + self.channels_last = (use_fp16 and fp16_channels_last) + self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter)) + self.num_conv = 0 + self.num_torgb = 0 + + if in_channels == 0: + self.const = torch.nn.Parameter(torch.randn([out_channels, resolution, resolution])) + + if in_channels != 0: + self.conv0 = SynthesisLayer(in_channels, out_channels, w_dim=w_dim, resolution=resolution, up=2, + resample_filter=resample_filter, conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs) + self.num_conv += 1 + + self.conv1 = SynthesisLayer(out_channels, out_channels, w_dim=w_dim, resolution=resolution, + conv_clamp=conv_clamp, channels_last=self.channels_last, **layer_kwargs) + self.num_conv += 1 + + if is_last or architecture == 'skip': + self.torgb = ToRGBLayer(out_channels, img_channels, w_dim=w_dim, + conv_clamp=conv_clamp, channels_last=self.channels_last) + self.num_torgb += 1 + + if in_channels != 0 and architecture == 'resnet': + self.skip = Conv2dLayer(in_channels, out_channels, kernel_size=1, bias=False, up=2, + resample_filter=resample_filter, channels_last=self.channels_last) + + def forward(self, x, img, ws, force_fp32=False, fused_modconv=None, **layer_kwargs): + misc.assert_shape(ws, [None, self.num_conv + self.num_torgb, self.w_dim]) + w_iter = iter(ws.unbind(dim=1)) + dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32 + memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format + if fused_modconv is None: + with misc.suppress_tracer_warnings(): # this value will be treated as a constant + fused_modconv = (not self.training) and (dtype == torch.float32 or int(x.shape[0]) == 1) + + # Input. + if self.in_channels == 0: + x = self.const.to(dtype=dtype, memory_format=memory_format) + x = x.unsqueeze(0).repeat([ws.shape[0], 1, 1, 1]) + else: + misc.assert_shape(x, [None, self.in_channels, self.resolution // 2, self.resolution // 2]) + x = x.to(dtype=dtype, memory_format=memory_format) + + # Main layers. + if self.in_channels == 0: + x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs) + elif self.architecture == 'resnet': + y = self.skip(x, gain=np.sqrt(0.5)) + x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs) + x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, gain=np.sqrt(0.5), **layer_kwargs) + x = y.add_(x) + else: + x = self.conv0(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs) + x = self.conv1(x, next(w_iter), fused_modconv=fused_modconv, **layer_kwargs) + + # ToRGB. + if img is not None: + misc.assert_shape(img, [None, self.img_channels, self.resolution // 2, self.resolution // 2]) + img = upfirdn2d.upsample2d(img, self.resample_filter) + if self.is_last or self.architecture == 'skip': + y = self.torgb(x, next(w_iter), fused_modconv=fused_modconv) + y = y.to(dtype=torch.float32, memory_format=torch.contiguous_format) + img = img.add_(y) if img is not None else y + + assert x.dtype == dtype + assert img is None or img.dtype == torch.float32 + return x, img + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class SynthesisNetwork(torch.nn.Module): + def __init__(self, + w_dim, # Intermediate latent (W) dimensionality. + img_resolution, # Output image resolution. + img_channels, # Number of color channels. + channel_base = 32768, # Overall multiplier for the number of channels. + channel_max = 512, # Maximum number of channels in any layer. + num_fp16_res = 0, # Use FP16 for the N highest resolutions. + **block_kwargs, # Arguments for SynthesisBlock. + ): + assert img_resolution >= 4 and img_resolution & (img_resolution - 1) == 0 + super().__init__() + self.w_dim = w_dim + self.img_resolution = img_resolution + self.img_resolution_log2 = int(np.log2(img_resolution)) + self.img_channels = img_channels + self.block_resolutions = [2 ** i for i in range(2, self.img_resolution_log2 + 1)] + channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions} + fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8) + + self.num_ws = 0 + for res in self.block_resolutions: + in_channels = channels_dict[res // 2] if res > 4 else 0 + out_channels = channels_dict[res] + use_fp16 = (res >= fp16_resolution) + is_last = (res == self.img_resolution) + block = SynthesisBlock(in_channels, out_channels, w_dim=w_dim, resolution=res, + img_channels=img_channels, is_last=is_last, use_fp16=use_fp16, **block_kwargs) + self.num_ws += block.num_conv + if is_last: + self.num_ws += block.num_torgb + setattr(self, f'b{res}', block) + + def forward(self, ws, **block_kwargs): + block_ws = [] + with torch.autograd.profiler.record_function('split_ws'): + misc.assert_shape(ws, [None, self.num_ws, self.w_dim]) + ws = ws.to(torch.float32) + w_idx = 0 + for res in self.block_resolutions: + block = getattr(self, f'b{res}') + block_ws.append(ws.narrow(1, w_idx, block.num_conv + block.num_torgb)) + w_idx += block.num_conv + + x = img = None + for res, cur_ws in zip(self.block_resolutions, block_ws): + block = getattr(self, f'b{res}') + x, img = block(x, img, cur_ws, **block_kwargs) + return img + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class Generator(torch.nn.Module): + def __init__(self, + z_dim, # Input latent (Z) dimensionality. + c_dim, # Conditioning label (C) dimensionality. + w_dim, # Intermediate latent (W) dimensionality. + img_resolution, # Output resolution. + img_channels, # Number of output color channels. + mapping_kwargs = {}, # Arguments for MappingNetwork. + synthesis_kwargs = {}, # Arguments for SynthesisNetwork. + ): + super().__init__() + self.z_dim = z_dim + self.c_dim = c_dim + self.w_dim = w_dim + self.img_resolution = img_resolution + self.img_channels = img_channels + self.synthesis = SynthesisNetwork(w_dim=w_dim, img_resolution=img_resolution, img_channels=img_channels, **synthesis_kwargs) + self.num_ws = self.synthesis.num_ws + self.mapping = MappingNetwork(z_dim=z_dim, c_dim=c_dim, w_dim=w_dim, num_ws=self.num_ws, **mapping_kwargs) + + def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, **synthesis_kwargs): + ws = self.mapping(z, c, truncation_psi=truncation_psi, truncation_cutoff=truncation_cutoff) + img = self.synthesis(ws, **synthesis_kwargs) + return img + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class DiscriminatorBlock(torch.nn.Module): + def __init__(self, + in_channels, # Number of input channels, 0 = first block. + tmp_channels, # Number of intermediate channels. + out_channels, # Number of output channels. + resolution, # Resolution of this block. + img_channels, # Number of input color channels. + first_layer_idx, # Index of the first layer. + architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. + activation = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + resample_filter = [1,3,3,1], # Low-pass filter to apply when resampling activations. + conv_clamp = None, # Clamp the output of convolution layers to +-X, None = disable clamping. + use_fp16 = False, # Use FP16 for this block? + fp16_channels_last = False, # Use channels-last memory format with FP16? + freeze_layers = 0, # Freeze-D: Number of layers to freeze. + ): + assert in_channels in [0, tmp_channels] + assert architecture in ['orig', 'skip', 'resnet'] + super().__init__() + self.in_channels = in_channels + self.resolution = resolution + self.img_channels = img_channels + self.first_layer_idx = first_layer_idx + self.architecture = architecture + self.use_fp16 = use_fp16 + self.channels_last = (use_fp16 and fp16_channels_last) + self.register_buffer('resample_filter', upfirdn2d.setup_filter(resample_filter)) + + self.num_layers = 0 + def trainable_gen(): + while True: + layer_idx = self.first_layer_idx + self.num_layers + trainable = (layer_idx >= freeze_layers) + self.num_layers += 1 + yield trainable + trainable_iter = trainable_gen() + + if in_channels == 0 or architecture == 'skip': + self.fromrgb = Conv2dLayer(img_channels, tmp_channels, kernel_size=1, activation=activation, + trainable=next(trainable_iter), conv_clamp=conv_clamp, channels_last=self.channels_last) + + self.conv0 = Conv2dLayer(tmp_channels, tmp_channels, kernel_size=3, activation=activation, + trainable=next(trainable_iter), conv_clamp=conv_clamp, channels_last=self.channels_last) + + self.conv1 = Conv2dLayer(tmp_channels, out_channels, kernel_size=3, activation=activation, down=2, + trainable=next(trainable_iter), resample_filter=resample_filter, conv_clamp=conv_clamp, channels_last=self.channels_last) + + if architecture == 'resnet': + self.skip = Conv2dLayer(tmp_channels, out_channels, kernel_size=1, bias=False, down=2, + trainable=next(trainable_iter), resample_filter=resample_filter, channels_last=self.channels_last) + + def forward(self, x, img, force_fp32=False): + dtype = torch.float16 if self.use_fp16 and not force_fp32 else torch.float32 + memory_format = torch.channels_last if self.channels_last and not force_fp32 else torch.contiguous_format + + # Input. + if x is not None: + misc.assert_shape(x, [None, self.in_channels, self.resolution, self.resolution]) + x = x.to(dtype=dtype, memory_format=memory_format) + + # FromRGB. + if self.in_channels == 0 or self.architecture == 'skip': + misc.assert_shape(img, [None, self.img_channels, self.resolution, self.resolution]) + img = img.to(dtype=dtype, memory_format=memory_format) + y = self.fromrgb(img) + x = x + y if x is not None else y + img = upfirdn2d.downsample2d(img, self.resample_filter) if self.architecture == 'skip' else None + + # Main layers. + if self.architecture == 'resnet': + y = self.skip(x, gain=np.sqrt(0.5)) + x = self.conv0(x) + x = self.conv1(x, gain=np.sqrt(0.5)) + x = y.add_(x) + else: + x = self.conv0(x) + x = self.conv1(x) + + assert x.dtype == dtype + return x, img + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class MinibatchStdLayer(torch.nn.Module): + def __init__(self, group_size, num_channels=1): + super().__init__() + self.group_size = group_size + self.num_channels = num_channels + + def forward(self, x): + N, C, H, W = x.shape + with misc.suppress_tracer_warnings(): # as_tensor results are registered as constants + G = torch.min(torch.as_tensor(self.group_size), torch.as_tensor(N)) if self.group_size is not None else N + F = self.num_channels + c = C // F + + y = x.reshape(G, -1, F, c, H, W) # [GnFcHW] Split minibatch N into n groups of size G, and channels C into F groups of size c. + y = y - y.mean(dim=0) # [GnFcHW] Subtract mean over group. + y = y.square().mean(dim=0) # [nFcHW] Calc variance over group. + y = (y + 1e-8).sqrt() # [nFcHW] Calc stddev over group. + y = y.mean(dim=[2,3,4]) # [nF] Take average over channels and pixels. + y = y.reshape(-1, F, 1, 1) # [nF11] Add missing dimensions. + y = y.repeat(G, 1, H, W) # [NFHW] Replicate over group and pixels. + x = torch.cat([x, y], dim=1) # [NCHW] Append to input as new channels. + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class DiscriminatorEpilogue(torch.nn.Module): + def __init__(self, + in_channels, # Number of input channels. + cmap_dim, # Dimensionality of mapped conditioning label, 0 = no label. + resolution, # Resolution of this block. + img_channels, # Number of input color channels. + architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, None = entire minibatch. + mbstd_num_channels = 1, # Number of features for the minibatch standard deviation layer, 0 = disable. + activation = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + conv_clamp = None, # Clamp the output of convolution layers to +-X, None = disable clamping. + ): + assert architecture in ['orig', 'skip', 'resnet'] + super().__init__() + self.in_channels = in_channels + self.cmap_dim = cmap_dim + self.resolution = resolution + self.img_channels = img_channels + self.architecture = architecture + + if architecture == 'skip': + self.fromrgb = Conv2dLayer(img_channels, in_channels, kernel_size=1, activation=activation) + self.mbstd = MinibatchStdLayer(group_size=mbstd_group_size, num_channels=mbstd_num_channels) if mbstd_num_channels > 0 else None + self.conv = Conv2dLayer(in_channels + mbstd_num_channels, in_channels, kernel_size=3, activation=activation, conv_clamp=conv_clamp) + self.fc = FullyConnectedLayer(in_channels * (resolution ** 2), in_channels, activation=activation) + self.out = FullyConnectedLayer(in_channels, 1 if cmap_dim == 0 else cmap_dim) + + def forward(self, x, img, cmap, force_fp32=False): + misc.assert_shape(x, [None, self.in_channels, self.resolution, self.resolution]) # [NCHW] + _ = force_fp32 # unused + dtype = torch.float32 + memory_format = torch.contiguous_format + + # FromRGB. + x = x.to(dtype=dtype, memory_format=memory_format) + if self.architecture == 'skip': + misc.assert_shape(img, [None, self.img_channels, self.resolution, self.resolution]) + img = img.to(dtype=dtype, memory_format=memory_format) + x = x + self.fromrgb(img) + + # Main layers. + if self.mbstd is not None: + x = self.mbstd(x) + x = self.conv(x) + x = self.fc(x.flatten(1)) + x = self.out(x) + + # Conditioning. + if self.cmap_dim > 0: + misc.assert_shape(cmap, [None, self.cmap_dim]) + x = (x * cmap).sum(dim=1, keepdim=True) * (1 / np.sqrt(self.cmap_dim)) + + assert x.dtype == dtype + return x + +#---------------------------------------------------------------------------- + +@persistence.persistent_class +class Discriminator(torch.nn.Module): + def __init__(self, + c_dim, # Conditioning label (C) dimensionality. + img_resolution, # Input resolution. + img_channels, # Number of input color channels. + architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. + channel_base = 32768, # Overall multiplier for the number of channels. + channel_max = 512, # Maximum number of channels in any layer. + num_fp16_res = 0, # Use FP16 for the N highest resolutions. + conv_clamp = None, # Clamp the output of convolution layers to +-X, None = disable clamping. + cmap_dim = None, # Dimensionality of mapped conditioning label, None = default. + block_kwargs = {}, # Arguments for DiscriminatorBlock. + mapping_kwargs = {}, # Arguments for MappingNetwork. + epilogue_kwargs = {}, # Arguments for DiscriminatorEpilogue. + ): + super().__init__() + self.c_dim = c_dim + self.img_resolution = img_resolution + self.img_resolution_log2 = int(np.log2(img_resolution)) + self.img_channels = img_channels + self.block_resolutions = [2 ** i for i in range(self.img_resolution_log2, 2, -1)] + channels_dict = {res: min(channel_base // res, channel_max) for res in self.block_resolutions + [4]} + fp16_resolution = max(2 ** (self.img_resolution_log2 + 1 - num_fp16_res), 8) + + if cmap_dim is None: + cmap_dim = channels_dict[4] + if c_dim == 0: + cmap_dim = 0 + + common_kwargs = dict(img_channels=img_channels, architecture=architecture, conv_clamp=conv_clamp) + cur_layer_idx = 0 + for res in self.block_resolutions: + in_channels = channels_dict[res] if res < img_resolution else 0 + tmp_channels = channels_dict[res] + out_channels = channels_dict[res // 2] + use_fp16 = (res >= fp16_resolution) + block = DiscriminatorBlock(in_channels, tmp_channels, out_channels, resolution=res, + first_layer_idx=cur_layer_idx, use_fp16=use_fp16, **block_kwargs, **common_kwargs) + setattr(self, f'b{res}', block) + cur_layer_idx += block.num_layers + if c_dim > 0: + self.mapping = MappingNetwork(z_dim=0, c_dim=c_dim, w_dim=cmap_dim, num_ws=None, w_avg_beta=None, **mapping_kwargs) + self.b4 = DiscriminatorEpilogue(channels_dict[4], cmap_dim=cmap_dim, resolution=4, **epilogue_kwargs, **common_kwargs) + + def forward(self, img, c, **block_kwargs): + x = None + for res in self.block_resolutions: + block = getattr(self, f'b{res}') + x, img = block(x, img, **block_kwargs) + + cmap = None + if self.c_dim > 0: + cmap = self.mapping(None, c) + x = self.b4(x, img, cmap) + return x + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/training_loop.py b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/training_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..994797b9db56792993522ab37b40a5072f95de6a --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_pth_official/training/training_loop.py @@ -0,0 +1,421 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import os +import time +import copy +import json +import pickle +import psutil +import PIL.Image +import numpy as np +import torch +import dnnlib +from torch_utils import misc +from torch_utils import training_stats +from torch_utils.ops import conv2d_gradfix +from torch_utils.ops import grid_sample_gradfix + +import legacy +from metrics import metric_main + +#---------------------------------------------------------------------------- + +def setup_snapshot_image_grid(training_set, random_seed=0): + rnd = np.random.RandomState(random_seed) + gw = np.clip(7680 // training_set.image_shape[2], 7, 32) + gh = np.clip(4320 // training_set.image_shape[1], 4, 32) + + # No labels => show random subset of training samples. + if not training_set.has_labels: + all_indices = list(range(len(training_set))) + rnd.shuffle(all_indices) + grid_indices = [all_indices[i % len(all_indices)] for i in range(gw * gh)] + + else: + # Group training samples by label. + label_groups = dict() # label => [idx, ...] + for idx in range(len(training_set)): + label = tuple(training_set.get_details(idx).raw_label.flat[::-1]) + if label not in label_groups: + label_groups[label] = [] + label_groups[label].append(idx) + + # Reorder. + label_order = sorted(label_groups.keys()) + for label in label_order: + rnd.shuffle(label_groups[label]) + + # Organize into grid. + grid_indices = [] + for y in range(gh): + label = label_order[y % len(label_order)] + indices = label_groups[label] + grid_indices += [indices[x % len(indices)] for x in range(gw)] + label_groups[label] = [indices[(i + gw) % len(indices)] for i in range(len(indices))] + + # Load data. + images, labels = zip(*[training_set[i] for i in grid_indices]) + return (gw, gh), np.stack(images), np.stack(labels) + +#---------------------------------------------------------------------------- + +def save_image_grid(img, fname, drange, grid_size): + lo, hi = drange + img = np.asarray(img, dtype=np.float32) + img = (img - lo) * (255 / (hi - lo)) + img = np.rint(img).clip(0, 255).astype(np.uint8) + + gw, gh = grid_size + _N, C, H, W = img.shape + img = img.reshape(gh, gw, C, H, W) + img = img.transpose(0, 3, 1, 4, 2) + img = img.reshape(gh * H, gw * W, C) + + assert C in [1, 3] + if C == 1: + PIL.Image.fromarray(img[:, :, 0], 'L').save(fname) + if C == 3: + PIL.Image.fromarray(img, 'RGB').save(fname) + +#---------------------------------------------------------------------------- + +def training_loop( + run_dir = '.', # Output directory. + training_set_kwargs = {}, # Options for training set. + data_loader_kwargs = {}, # Options for torch.utils.data.DataLoader. + G_kwargs = {}, # Options for generator network. + D_kwargs = {}, # Options for discriminator network. + G_opt_kwargs = {}, # Options for generator optimizer. + D_opt_kwargs = {}, # Options for discriminator optimizer. + augment_kwargs = None, # Options for augmentation pipeline. None = disable. + loss_kwargs = {}, # Options for loss function. + metrics = [], # Metrics to evaluate during training. + random_seed = 0, # Global random seed. + num_gpus = 1, # Number of GPUs participating in the training. + rank = 0, # Rank of the current process in [0, num_gpus[. + batch_size = 4, # Total batch size for one training iteration. Can be larger than batch_gpu * num_gpus. + batch_gpu = 4, # Number of samples processed at a time by one GPU. + ema_kimg = 10, # Half-life of the exponential moving average (EMA) of generator weights. + ema_rampup = None, # EMA ramp-up coefficient. + G_reg_interval = 4, # How often to perform regularization for G? None = disable lazy regularization. + D_reg_interval = 16, # How often to perform regularization for D? None = disable lazy regularization. + augment_p = 0, # Initial value of augmentation probability. + ada_target = None, # ADA target value. None = fixed p. + ada_interval = 4, # How often to perform ADA adjustment? + ada_kimg = 500, # ADA adjustment speed, measured in how many kimg it takes for p to increase/decrease by one unit. + total_kimg = 25000, # Total length of the training, measured in thousands of real images. + kimg_per_tick = 4, # Progress snapshot interval. + image_snapshot_ticks = 50, # How often to save image snapshots? None = disable. + network_snapshot_ticks = 50, # How often to save network snapshots? None = disable. + resume_pkl = None, # Network pickle to resume training from. + cudnn_benchmark = True, # Enable torch.backends.cudnn.benchmark? + allow_tf32 = False, # Enable torch.backends.cuda.matmul.allow_tf32 and torch.backends.cudnn.allow_tf32? + abort_fn = None, # Callback function for determining whether to abort training. Must return consistent results across ranks. + progress_fn = None, # Callback function for updating training progress. Called for all ranks. +): + # Initialize. + start_time = time.time() + device = torch.device('cuda', rank) + np.random.seed(random_seed * num_gpus + rank) + torch.manual_seed(random_seed * num_gpus + rank) + torch.backends.cudnn.benchmark = cudnn_benchmark # Improves training speed. + torch.backends.cuda.matmul.allow_tf32 = allow_tf32 # Allow PyTorch to internally use tf32 for matmul + torch.backends.cudnn.allow_tf32 = allow_tf32 # Allow PyTorch to internally use tf32 for convolutions + conv2d_gradfix.enabled = True # Improves training speed. + grid_sample_gradfix.enabled = True # Avoids errors with the augmentation pipe. + + # Load training set. + if rank == 0: + print('Loading training set...') + training_set = dnnlib.util.construct_class_by_name(**training_set_kwargs) # subclass of training.dataset.Dataset + training_set_sampler = misc.InfiniteSampler(dataset=training_set, rank=rank, num_replicas=num_gpus, seed=random_seed) + training_set_iterator = iter(torch.utils.data.DataLoader(dataset=training_set, sampler=training_set_sampler, batch_size=batch_size//num_gpus, **data_loader_kwargs)) + if rank == 0: + print() + print('Num images: ', len(training_set)) + print('Image shape:', training_set.image_shape) + print('Label shape:', training_set.label_shape) + print() + + # Construct networks. + if rank == 0: + print('Constructing networks...') + common_kwargs = dict(c_dim=training_set.label_dim, img_resolution=training_set.resolution, img_channels=training_set.num_channels) + G = dnnlib.util.construct_class_by_name(**G_kwargs, **common_kwargs).train().requires_grad_(False).to(device) # subclass of torch.nn.Module + D = dnnlib.util.construct_class_by_name(**D_kwargs, **common_kwargs).train().requires_grad_(False).to(device) # subclass of torch.nn.Module + G_ema = copy.deepcopy(G).eval() + + # Resume from existing pickle. + if (resume_pkl is not None) and (rank == 0): + print(f'Resuming from "{resume_pkl}"') + with dnnlib.util.open_url(resume_pkl) as f: + resume_data = legacy.load_network_pkl(f) + for name, module in [('G', G), ('D', D), ('G_ema', G_ema)]: + misc.copy_params_and_buffers(resume_data[name], module, require_all=False) + + # Print network summary tables. + if rank == 0: + z = torch.empty([batch_gpu, G.z_dim], device=device) + c = torch.empty([batch_gpu, G.c_dim], device=device) + img = misc.print_module_summary(G, [z, c]) + misc.print_module_summary(D, [img, c]) + + # Setup augmentation. + if rank == 0: + print('Setting up augmentation...') + augment_pipe = None + ada_stats = None + if (augment_kwargs is not None) and (augment_p > 0 or ada_target is not None): + augment_pipe = dnnlib.util.construct_class_by_name(**augment_kwargs).train().requires_grad_(False).to(device) # subclass of torch.nn.Module + augment_pipe.p.copy_(torch.as_tensor(augment_p)) + if ada_target is not None: + ada_stats = training_stats.Collector(regex='Loss/signs/real') + + # Distribute across GPUs. + if rank == 0: + print(f'Distributing across {num_gpus} GPUs...') + ddp_modules = dict() + for name, module in [('G_mapping', G.mapping), ('G_synthesis', G.synthesis), ('D', D), (None, G_ema), ('augment_pipe', augment_pipe)]: + if (num_gpus > 1) and (module is not None) and len(list(module.parameters())) != 0: + module.requires_grad_(True) + module = torch.nn.parallel.DistributedDataParallel(module, device_ids=[device], broadcast_buffers=False) + module.requires_grad_(False) + if name is not None: + ddp_modules[name] = module + + # Setup training phases. + if rank == 0: + print('Setting up training phases...') + loss = dnnlib.util.construct_class_by_name(device=device, **ddp_modules, **loss_kwargs) # subclass of training.loss.Loss + phases = [] + for name, module, opt_kwargs, reg_interval in [('G', G, G_opt_kwargs, G_reg_interval), ('D', D, D_opt_kwargs, D_reg_interval)]: + if reg_interval is None: + opt = dnnlib.util.construct_class_by_name(params=module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer + phases += [dnnlib.EasyDict(name=name+'both', module=module, opt=opt, interval=1)] + else: # Lazy regularization. + mb_ratio = reg_interval / (reg_interval + 1) + opt_kwargs = dnnlib.EasyDict(opt_kwargs) + opt_kwargs.lr = opt_kwargs.lr * mb_ratio + opt_kwargs.betas = [beta ** mb_ratio for beta in opt_kwargs.betas] + opt = dnnlib.util.construct_class_by_name(module.parameters(), **opt_kwargs) # subclass of torch.optim.Optimizer + phases += [dnnlib.EasyDict(name=name+'main', module=module, opt=opt, interval=1)] + phases += [dnnlib.EasyDict(name=name+'reg', module=module, opt=opt, interval=reg_interval)] + for phase in phases: + phase.start_event = None + phase.end_event = None + if rank == 0: + phase.start_event = torch.cuda.Event(enable_timing=True) + phase.end_event = torch.cuda.Event(enable_timing=True) + + # Export sample images. + grid_size = None + grid_z = None + grid_c = None + if rank == 0: + print('Exporting sample images...') + grid_size, images, labels = setup_snapshot_image_grid(training_set=training_set) + save_image_grid(images, os.path.join(run_dir, 'reals.png'), drange=[0,255], grid_size=grid_size) + grid_z = torch.randn([labels.shape[0], G.z_dim], device=device).split(batch_gpu) + grid_c = torch.from_numpy(labels).to(device).split(batch_gpu) + images = torch.cat([G_ema(z=z, c=c, noise_mode='const').cpu() for z, c in zip(grid_z, grid_c)]).numpy() + save_image_grid(images, os.path.join(run_dir, 'fakes_init.png'), drange=[-1,1], grid_size=grid_size) + + # Initialize logs. + if rank == 0: + print('Initializing logs...') + stats_collector = training_stats.Collector(regex='.*') + stats_metrics = dict() + stats_jsonl = None + stats_tfevents = None + if rank == 0: + stats_jsonl = open(os.path.join(run_dir, 'stats.jsonl'), 'wt') + try: + import torch.utils.tensorboard as tensorboard + stats_tfevents = tensorboard.SummaryWriter(run_dir) + except ImportError as err: + print('Skipping tfevents export:', err) + + # Train. + if rank == 0: + print(f'Training for {total_kimg} kimg...') + print() + cur_nimg = 0 + cur_tick = 0 + tick_start_nimg = cur_nimg + tick_start_time = time.time() + maintenance_time = tick_start_time - start_time + batch_idx = 0 + if progress_fn is not None: + progress_fn(0, total_kimg) + while True: + + # Fetch training data. + with torch.autograd.profiler.record_function('data_fetch'): + phase_real_img, phase_real_c = next(training_set_iterator) + phase_real_img = (phase_real_img.to(device).to(torch.float32) / 127.5 - 1).split(batch_gpu) + phase_real_c = phase_real_c.to(device).split(batch_gpu) + all_gen_z = torch.randn([len(phases) * batch_size, G.z_dim], device=device) + all_gen_z = [phase_gen_z.split(batch_gpu) for phase_gen_z in all_gen_z.split(batch_size)] + all_gen_c = [training_set.get_label(np.random.randint(len(training_set))) for _ in range(len(phases) * batch_size)] + all_gen_c = torch.from_numpy(np.stack(all_gen_c)).pin_memory().to(device) + all_gen_c = [phase_gen_c.split(batch_gpu) for phase_gen_c in all_gen_c.split(batch_size)] + + # Execute training phases. + for phase, phase_gen_z, phase_gen_c in zip(phases, all_gen_z, all_gen_c): + if batch_idx % phase.interval != 0: + continue + + # Initialize gradient accumulation. + if phase.start_event is not None: + phase.start_event.record(torch.cuda.current_stream(device)) + phase.opt.zero_grad(set_to_none=True) + phase.module.requires_grad_(True) + + # Accumulate gradients over multiple rounds. + for round_idx, (real_img, real_c, gen_z, gen_c) in enumerate(zip(phase_real_img, phase_real_c, phase_gen_z, phase_gen_c)): + sync = (round_idx == batch_size // (batch_gpu * num_gpus) - 1) + gain = phase.interval + loss.accumulate_gradients(phase=phase.name, real_img=real_img, real_c=real_c, gen_z=gen_z, gen_c=gen_c, sync=sync, gain=gain) + + # Update weights. + phase.module.requires_grad_(False) + with torch.autograd.profiler.record_function(phase.name + '_opt'): + for param in phase.module.parameters(): + if param.grad is not None: + misc.nan_to_num(param.grad, nan=0, posinf=1e5, neginf=-1e5, out=param.grad) + phase.opt.step() + if phase.end_event is not None: + phase.end_event.record(torch.cuda.current_stream(device)) + + # Update G_ema. + with torch.autograd.profiler.record_function('Gema'): + ema_nimg = ema_kimg * 1000 + if ema_rampup is not None: + ema_nimg = min(ema_nimg, cur_nimg * ema_rampup) + ema_beta = 0.5 ** (batch_size / max(ema_nimg, 1e-8)) + for p_ema, p in zip(G_ema.parameters(), G.parameters()): + p_ema.copy_(p.lerp(p_ema, ema_beta)) + for b_ema, b in zip(G_ema.buffers(), G.buffers()): + b_ema.copy_(b) + + # Update state. + cur_nimg += batch_size + batch_idx += 1 + + # Execute ADA heuristic. + if (ada_stats is not None) and (batch_idx % ada_interval == 0): + ada_stats.update() + adjust = np.sign(ada_stats['Loss/signs/real'] - ada_target) * (batch_size * ada_interval) / (ada_kimg * 1000) + augment_pipe.p.copy_((augment_pipe.p + adjust).max(misc.constant(0, device=device))) + + # Perform maintenance tasks once per tick. + done = (cur_nimg >= total_kimg * 1000) + if (not done) and (cur_tick != 0) and (cur_nimg < tick_start_nimg + kimg_per_tick * 1000): + continue + + # Print status line, accumulating the same information in stats_collector. + tick_end_time = time.time() + fields = [] + fields += [f"tick {training_stats.report0('Progress/tick', cur_tick):<5d}"] + fields += [f"kimg {training_stats.report0('Progress/kimg', cur_nimg / 1e3):<8.1f}"] + fields += [f"time {dnnlib.util.format_time(training_stats.report0('Timing/total_sec', tick_end_time - start_time)):<12s}"] + fields += [f"sec/tick {training_stats.report0('Timing/sec_per_tick', tick_end_time - tick_start_time):<7.1f}"] + fields += [f"sec/kimg {training_stats.report0('Timing/sec_per_kimg', (tick_end_time - tick_start_time) / (cur_nimg - tick_start_nimg) * 1e3):<7.2f}"] + fields += [f"maintenance {training_stats.report0('Timing/maintenance_sec', maintenance_time):<6.1f}"] + fields += [f"cpumem {training_stats.report0('Resources/cpu_mem_gb', psutil.Process(os.getpid()).memory_info().rss / 2**30):<6.2f}"] + fields += [f"gpumem {training_stats.report0('Resources/peak_gpu_mem_gb', torch.cuda.max_memory_allocated(device) / 2**30):<6.2f}"] + torch.cuda.reset_peak_memory_stats() + fields += [f"augment {training_stats.report0('Progress/augment', float(augment_pipe.p.cpu()) if augment_pipe is not None else 0):.3f}"] + training_stats.report0('Timing/total_hours', (tick_end_time - start_time) / (60 * 60)) + training_stats.report0('Timing/total_days', (tick_end_time - start_time) / (24 * 60 * 60)) + if rank == 0: + print(' '.join(fields)) + + # Check for abort. + if (not done) and (abort_fn is not None) and abort_fn(): + done = True + if rank == 0: + print() + print('Aborting...') + + # Save image snapshot. + if (rank == 0) and (image_snapshot_ticks is not None) and (done or cur_tick % image_snapshot_ticks == 0): + images = torch.cat([G_ema(z=z, c=c, noise_mode='const').cpu() for z, c in zip(grid_z, grid_c)]).numpy() + save_image_grid(images, os.path.join(run_dir, f'fakes{cur_nimg//1000:06d}.png'), drange=[-1,1], grid_size=grid_size) + + # Save network snapshot. + snapshot_pkl = None + snapshot_data = None + if (network_snapshot_ticks is not None) and (done or cur_tick % network_snapshot_ticks == 0): + snapshot_data = dict(training_set_kwargs=dict(training_set_kwargs)) + for name, module in [('G', G), ('D', D), ('G_ema', G_ema), ('augment_pipe', augment_pipe)]: + if module is not None: + if num_gpus > 1: + misc.check_ddp_consistency(module, ignore_regex=r'.*\.w_avg') + module = copy.deepcopy(module).eval().requires_grad_(False).cpu() + snapshot_data[name] = module + del module # conserve memory + snapshot_pkl = os.path.join(run_dir, f'network-snapshot-{cur_nimg//1000:06d}.pkl') + if rank == 0: + with open(snapshot_pkl, 'wb') as f: + pickle.dump(snapshot_data, f) + + # Evaluate metrics. + if (snapshot_data is not None) and (len(metrics) > 0): + if rank == 0: + print('Evaluating metrics...') + for metric in metrics: + result_dict = metric_main.calc_metric(metric=metric, G=snapshot_data['G_ema'], + dataset_kwargs=training_set_kwargs, num_gpus=num_gpus, rank=rank, device=device) + if rank == 0: + metric_main.report_metric(result_dict, run_dir=run_dir, snapshot_pkl=snapshot_pkl) + stats_metrics.update(result_dict.results) + del snapshot_data # conserve memory + + # Collect statistics. + for phase in phases: + value = [] + if (phase.start_event is not None) and (phase.end_event is not None): + phase.end_event.synchronize() + value = phase.start_event.elapsed_time(phase.end_event) + training_stats.report0('Timing/' + phase.name, value) + stats_collector.update() + stats_dict = stats_collector.as_dict() + + # Update logs. + timestamp = time.time() + if stats_jsonl is not None: + fields = dict(stats_dict, timestamp=timestamp) + stats_jsonl.write(json.dumps(fields) + '\n') + stats_jsonl.flush() + if stats_tfevents is not None: + global_step = int(cur_nimg / 1e3) + walltime = timestamp - start_time + for name, value in stats_dict.items(): + stats_tfevents.add_scalar(name, value.mean, global_step=global_step, walltime=walltime) + for name, value in stats_metrics.items(): + stats_tfevents.add_scalar(f'Metrics/{name}', value, global_step=global_step, walltime=walltime) + stats_tfevents.flush() + if progress_fn is not None: + progress_fn(cur_nimg // 1000, total_kimg) + + # Update state. + cur_tick += 1 + tick_start_nimg = cur_nimg + tick_start_time = time.time() + maintenance_time = tick_start_time - tick_end_time + if done: + break + + # Done. + if rank == 0: + print() + print('Exiting...') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_converter.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..3e0ffb5fc4de34d0301049d85f302499a8559ec3 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_converter.py @@ -0,0 +1,266 @@ +# python3.7 +"""Converts StyleGAN2-ADA model weights from TensorFlow to PyTorch. + +The models can be trained through OR released by the repository: + +https://github.com/NVlabs/stylegan2-ada +""" + +import os +import sys +import pickle +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) + +# pylint: disable=wrong-import-position +from tqdm import tqdm +import numpy as np +import tensorflow as tf +import torch +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +from models import build_model +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image +# pylint: enable=wrong-import-position + +__all__ = ['convert_stylegan2ada_tf_weight'] + +GAN_TPYE = 'stylegan2' +OFFICIAL_CODE_DIR = 'stylegan2ada_tf_official' +BASE_DIR = os.path.dirname(os.path.relpath(__file__)) +CODE_PATH = os.path.join(BASE_DIR, OFFICIAL_CODE_DIR) + +TRUNC_PSI = 0.5 +TRUNC_LAYERS = 18 +RANDOMIZE_NOISE = False + + +def convert_stylegan2ada_tf_weight(tf_weight_path, + pth_weight_path, + test_num=10, + save_test_image=False, + verbose=False): + """Converts the pre-trained StyleGAN2-ADA weights. + + Args: + tf_weight_path: Path to the TensorFlow model to load weights from. + pth_weight_path: Path to the PyTorch model to save converted weights. + test_num: Number of samples used to test the conversion. (default: 10) + save_test_image: Whether to save the test images. (default: False) + verbose: Whether to print verbose log message. (default: False) + """ + sess = tf.compat.v1.InteractiveSession() + + print(f'========================================') + print(f'Loading TensorFlow weights from `{tf_weight_path}` ...') + sys.path.insert(0, CODE_PATH) + with open(tf_weight_path, 'rb') as f: + G, D, Gs = pickle.load(f) + sys.path.pop(0) + print(f'Successfully loaded!') + print(f'--------------------') + + z_space_dim = G.input_shapes[0][1] + label_size = G.input_shapes[1][1] + w_space_dim = G.components.mapping.output_shape[2] + image_channels = G.output_shape[1] + resolution = G.output_shape[2] + repeat_w = True + + print(f'Converting TensorFlow weights (G) to PyTorch version ...') + G_vars = dict(G.__getstate__()['variables']) + G_vars.update(dict(G.components.mapping.__getstate__()['variables'])) + G_vars.update(dict(G.components.synthesis.__getstate__()['variables'])) + G_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=repeat_w, + image_channels=image_channels) + G_state_dict = G_pth.state_dict() + for pth_var_name, tf_var_name in G_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in G_vars + assert pth_var_name in G_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(G_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'mod_weight' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelEmbed' in tf_var_name: + var = var.permute(1, 0) + else: + var = var.permute(3, 2, 0, 1) + G_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (Gs) to PyTorch version ...') + Gs_vars = dict(Gs.__getstate__()['variables']) + Gs_vars.update(dict(Gs.components.mapping.__getstate__()['variables'])) + Gs_vars.update(dict(Gs.components.synthesis.__getstate__()['variables'])) + Gs_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=True, + image_channels=image_channels) + Gs_state_dict = Gs_pth.state_dict() + for pth_var_name, tf_var_name in Gs_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in Gs_vars + assert pth_var_name in Gs_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(Gs_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'mod_weight' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelConcat' in tf_var_name: + pass + else: + var = var.permute(3, 2, 0, 1) + Gs_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (D) to PyTorch version ...') + D_vars = dict(D.__getstate__()['variables']) + D_pth = build_model(gan_type=GAN_TPYE, + module='discriminator', + resolution=resolution, + label_size=label_size, + image_channels=image_channels) + D_state_dict = D_pth.state_dict() + for pth_var_name, tf_var_name in D_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in D_vars + assert pth_var_name in D_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(D_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelEmbed' in tf_var_name: + var = var.permute(1, 0) + elif 'Mapping' in tf_var_name: + var = var.permute(1, 0) + elif 'Output' in tf_var_name: + var = var.permute(1, 0) + else: + var = var.permute(3, 2, 0, 1) + D_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Saving pth weights to `{pth_weight_path}` ...') + state_dict = { + 'generator': G_state_dict, + 'discriminator': D_state_dict, + 'generator_smooth': Gs_state_dict, + } + torch.save(state_dict, pth_weight_path) + print(f'Successfully saved!') + print(f'--------------------') + + # Start testing if needed. + if test_num <= 0 or not tf.test.is_built_with_cuda(): + warnings.warn(f'Skip testing the converted weights!') + sess.close() + return + + if save_test_image: + html = HtmlPageVisualizer(num_rows=test_num, num_cols=3) + html.set_headers(['Index', 'Before Conversion', 'After Conversion']) + for i in range(test_num): + html.set_cell(i, 0, text=f'{i}') + + print(f'Testing conversion results ...') + G_pth.load_state_dict(G_state_dict) + D_pth.load_state_dict(D_state_dict) + Gs_pth.load_state_dict(Gs_state_dict) + G_pth.eval().cuda() + D_pth.eval().cuda() + Gs_pth.eval().cuda() + + gs_distance = 0.0 + dg_distance = 0.0 + for i in tqdm(range(test_num)): + # Test Gs(z). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + pth_label = None + tf_output = Gs.run(code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE) + pth_output = Gs_pth(pth_code, + label=pth_label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: Gs distance {distance:.6e}.') + gs_distance += distance + + if save_test_image: + html.set_cell(i, 1, image=postprocess_image(tf_output)[0]) + html.set_cell(i, 2, image=postprocess_image(pth_output)[0]) + + # Test D(G(z)). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + pth_label = None + tf_image = G.run(code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE) + tf_output = D.run(tf_image, label) + pth_image = G_pth(pth_code, + label=pth_label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + pth_output = D_pth(pth_image, pth_label) + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: D(G) distance {distance:.6e}.') + dg_distance += distance + + print(f'Average Gs distance is {gs_distance / test_num:.6e}.') + print(f'Average D(G) distance is {dg_distance / test_num:.6e}.') + print(f'========================================') + + if save_test_image: + html.save(f'{pth_weight_path}.conversion_test.html') + + sess.close() diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/Dockerfile b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..51817e50bdc0edb9af549bf4be45ff953d639690 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/Dockerfile @@ -0,0 +1,18 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +ARG BASE_IMAGE=nvcr.io/nvidia/tensorflow:20.10-tf1-py3 +FROM $BASE_IMAGE + +RUN pip install scipy==1.3.3 +RUN pip install requests==2.22.0 +RUN pip install Pillow==6.2.1 +RUN pip install h5py==2.9.0 +RUN pip install imageio==2.9.0 +RUN pip install imageio-ffmpeg==0.4.2 +RUN pip install tqdm==4.49.0 diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/README.md b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db4d107a6f7130e2557e6d823dbf1bb423c3e7d6 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/README.md @@ -0,0 +1,386 @@ +## StyleGAN2 with adaptive discriminator augmentation (ADA)
— Official TensorFlow implementation + +![Teaser image](./docs/stylegan2-ada-teaser-1024x252.png) + +**Training Generative Adversarial Networks with Limited Data**
+Tero Karras, Miika Aittala, Janne Hellsten, Samuli Laine, Jaakko Lehtinen, Timo Aila
+https://arxiv.org/abs/2006.06676
+ +Abstract: *Training generative adversarial networks (GAN) using too little data typically leads to discriminator overfitting, causing training to diverge. We propose an adaptive discriminator augmentation mechanism that significantly stabilizes training in limited data regimes. The approach does not require changes to loss functions or network architectures, and is applicable both when training from scratch and when fine-tuning an existing GAN on another dataset. We demonstrate, on several datasets, that good results are now possible using only a few thousand training images, often matching StyleGAN2 results with an order of magnitude fewer images. We expect this to open up new application domains for GANs. We also find that the widely used CIFAR-10 is, in fact, a limited data benchmark, and improve the record FID from 5.59 to 2.42.* + +For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)
+For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)
+ +## Looking for the PyTorch version? + +The [Official PyTorch version](https://github.com/NVlabs/stylegan2-ada-pytorch) is now available and supersedes the TensorFlow version. See the full list of versions [here](https://nvlabs.github.io/stylegan2/versions.html). + +## What's new + +This repository supersedes the original [StyleGAN2](https://github.com/NVlabs/stylegan2) with the following new features: + +* **ADA**: Significantly better results for datasets with less than ~30k training images. State-of-the-art results for CIFAR-10. +* **Mixed-precision support**: ~1.6x faster training, ~1.3x faster inference, ~1.5x lower GPU memory consumption. +* **Better hyperparameter defaults**: Reasonable out-of-the-box results for different dataset resolutions and GPU counts. +* **Clean codebase**: Extensive refactoring and simplification. The code should be generally easier to work with. +* **Command line tools**: Easily reproduce training runs from the paper, generate projection videos for arbitrary images, etc. +* **Network import**: Full support for network pickles produced by [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2). Faster loading times. +* **Augmentation pipeline**: Self-contained, reusable GPU implementation of extensive high-quality image augmentations. +* **Bugfixes** + +## External data repository + +| Path | Description +| :--- | :---------- +| [stylegan2-ada](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/) | Main directory hosted on Amazon S3 +|   ├  [ada-paper.pdf](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/ada-paper.pdf) | Paper PDF +|   ├  [images](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/images/) | Curated example images produced using the pre-trained models +|   ├  [videos](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/videos/) | Curated example interpolation videos +|   └  [pretrained](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/) | Pre-trained models +|     ├  metfaces.pkl | MetFaces at 1024x1024, transfer learning from FFHQ using ADA +|     ├  brecahad.pkl | BreCaHAD at 512x512, trained from scratch using ADA +|     ├  afhqcat.pkl | AFHQ Cat at 512x512, trained from scratch using ADA +|     ├  afhqdog.pkl | AFHQ Dog at 512x512, trained from scratch using ADA +|     ├  afhqwild.pkl | AFHQ Wild at 512x512, trained from scratch using ADA +|     ├  cifar10.pkl | Class-conditional CIFAR-10 at 32x32 +|     ├  ffhq.pkl | FFHQ at 1024x1024, trained using original StyleGAN2 +|     ├  [paper-fig7c-training-set-sweeps](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig7c-training-set-sweeps/) | All models used in Fig.7c (baseline, ADA, bCR) +|     ├  [paper-fig8a-comparison-methods](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig8a-comparison-methods/) | All models used in Fig.8a (comparison methods) +|     ├  [paper-fig8b-discriminator-capacity](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig8b-discriminator-capacity/) | All models used in Fig.8b (discriminator capacity) +|     ├  [paper-fig11a-small-datasets](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig11a-small-datasets/) | All models used in Fig.11a (small datasets, transfer learning) +|     ├  [paper-fig11b-cifar10](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/paper-fig11b-cifar10/) | All models used in Fig.11b (CIFAR-10) +|     ├  [transfer-learning-source-nets](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/) | Models used as starting point for transfer learning +|     └  [metrics](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/) | Feature detectors used by the quality metrics + +## Requirements + +* Linux and Windows are supported, but we recommend Linux for performance and compatibility reasons. +* 64-bit Python 3.6 or 3.7. We recommend Anaconda3 with numpy 1.14.3 or newer. +* We recommend TensorFlow 1.14, which we used for all experiments in the paper, but TensorFlow 1.15 is also supported on Linux. TensorFlow 2.x is not supported. +* On Windows you need to use TensorFlow 1.14, as the standard 1.15 installation does not include necessary C++ headers. +* 1–8 high-end NVIDIA GPUs with at least 12 GB of GPU memory, NVIDIA drivers, CUDA 10.0 toolkit and cuDNN 7.5. +* Docker users: use the [provided Dockerfile](./Dockerfile) to build an image with the required library dependencies. + +The generator and discriminator networks rely heavily on custom TensorFlow ops that are compiled on the fly using NVCC. On Windows, the compilation requires Microsoft Visual Studio to be in `PATH`. We recommend installing [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/) and adding it into `PATH` using `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"`. + +## Getting started + +Pre-trained networks are stored as `*.pkl` files that can be referenced using local filenames or URLs: + +```.bash +# Generate curated MetFaces images without truncation (Fig.10 left) +python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + +# Generate uncurated MetFaces images with truncation (Fig.12 upper left) +python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + +# Generate class conditional CIFAR-10 images (Fig.17 left, Car) +python generate.py --outdir=out --trunc=1 --seeds=0-35 --class=1 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/cifar10.pkl +``` + +Outputs from the above commands are placed under `out/*.png`. You can change the location with `--outdir`. Temporary cache files, such as CUDA build results and downloaded network pickles, will be saved under `$HOME/.cache/dnnlib`. This can be overridden using the `DNNLIB_CACHE_DIR` environment variable. + +**Docker**: You can run the above curated image example using Docker as follows: + +```.bash +docker build --tag stylegan2ada:latest . +docker run --gpus all -it --rm -v `pwd`:/scratch --user $(id -u):$(id -g) stylegan2ada:latest bash -c \ + "(cd /scratch && DNNLIB_CACHE_DIR=/scratch/.cache python3 generate.py --trunc=1 --seeds=85,265,297,849 \ + --outdir=out --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl)" +``` + +**Note**: The above defaults to a container base image that requires NVIDIA driver release `r455.23` or later. To build an image for older drivers and GPUs, run: + +```.bash +docker build --build-arg BASE_IMAGE=tensorflow/tensorflow:1.14.0-gpu-py3 --tag stylegan2ada:latest . +``` + +## Projecting images to latent space + +To find the matching latent vector for a given image file, run: + +```.bash +python projector.py --outdir=out --target=targetimg.png \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +``` + +For optimal results, the target image should be cropped and aligned similar to the original FFHQ dataset. The above command saves the projection target `out/target.png`, result `out/proj.png`, latent vector `out/dlatents.npz`, and progression video `out/proj.mp4`. You can render the resulting latent vector by specifying `--dlatents` for `python generate.py`: + +```.bash +python generate.py --outdir=out --dlatents=out/dlatents.npz \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +``` + +## Preparing datasets + +Datasets are stored as multi-resolution TFRecords, i.e., the same format used by [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2). Each dataset consists of multiple `*.tfrecords` files stored under a common directory, e.g., `~/datasets/ffhq/ffhq-r*.tfrecords` + +**MetFaces**: Download the [MetFaces dataset](https://github.com/NVlabs/metfaces-dataset) and convert to TFRecords: + +```.bash +python dataset_tool.py create_from_images ~/datasets/metfaces ~/downloads/metfaces/images +python dataset_tool.py display ~/datasets/metfaces +``` + +**BreCaHAD**: Download the [BreCaHAD dataset](https://figshare.com/articles/BreCaHAD_A_Dataset_for_Breast_Cancer_Histopathological_Annotation_and_Diagnosis/7379186). Generate 512x512 resolution crops and convert to TFRecords: + +```.bash +python dataset_tool.py extract_brecahad_crops --cropsize=512 \ + --output_dir=/tmp/brecahad-crops --brecahad_dir=~/downloads/brecahad/images + +python dataset_tool.py create_from_images ~/datasets/brecahad /tmp/brecahad-crops +python dataset_tool.py display ~/datasets/brecahad +``` + +**AFHQ**: Download the [AFHQ dataset](https://github.com/clovaai/stargan-v2/blob/master/README.md#animal-faces-hq-dataset-afhq) and convert to TFRecords: + +```.bash +python dataset_tool.py create_from_images ~/datasets/afhqcat ~/downloads/afhq/train/cat +python dataset_tool.py create_from_images ~/datasets/afhqdog ~/downloads/afhq/train/dog +python dataset_tool.py create_from_images ~/datasets/afhqwild ~/downloads/afhq/train/wild +python dataset_tool.py display ~/datasets/afhqcat +``` + +**CIFAR-10**: Download the [CIFAR-10 python version](https://www.cs.toronto.edu/~kriz/cifar.html). Convert to two separate TFRecords for unconditional and class-conditional training: + +```.bash +python dataset_tool.py create_cifar10 --ignore_labels=1 \ + ~/datasets/cifar10u ~/downloads/cifar-10-batches-py + +python dataset_tool.py create_cifar10 --ignore_labels=0 \ + ~/datasets/cifar10c ~/downloads/cifar-10-batches-py + +python dataset_tool.py display ~/datasets/cifar10c +``` + +**FFHQ**: Download the [Flickr-Faces-HQ](https://github.com/NVlabs/ffhq-dataset) dataset as TFRecords: + +```.bash +pushd ~ +git clone https://github.com/NVlabs/ffhq-dataset.git +cd ffhq-dataset +python download_ffhq.py --tfrecords +popd +python dataset_tool.py display ~/ffhq-dataset/tfrecords/ffhq +``` + +**LSUN**: Download the desired LSUN categories in LMDB format from the [LSUN project page](https://www.yf.io/p/lsun) and convert to TFRecords: + +```.bash +python dataset_tool.py create_lsun --resolution=256 --max_images=200000 \ + ~/datasets/lsuncat200k ~/downloads/lsun/cat_lmdb + +python dataset_tool.py display ~/datasets/lsuncat200k +``` + +**Custom**: Custom datasets can be created by placing all images under a single directory. The images must be square-shaped and they must all have the same power-of-two dimensions. To convert the images to multi-resolution TFRecords, run: + +```.bash +python dataset_tool.py create_from_images ~/datasets/custom ~/custom-images +python dataset_tool.py display ~/datasets/custom +``` + +## Training new networks + +In its most basic form, training new networks boils down to: + +```.bash +python train.py --outdir=~/training-runs --gpus=1 --data=~/datasets/custom --dry-run +python train.py --outdir=~/training-runs --gpus=1 --data=~/datasets/custom +``` + +The first command is optional; it will validate the arguments, print out the resulting training configuration, and exit. The second command will kick off the actual training. + +In this example, the results will be saved to a newly created directory `~/training-runs/-custom-auto1` (controlled by `--outdir`). The training will export network pickles (`network-snapshot-.pkl`) and example images (`fakes.png`) at regular intervals (controlled by `--snap`). For each pickle, it will also evaluate FID by default (controlled by `--metrics`) and log the resulting scores in `metric-fid50k_full.txt`. + +The name of the output directory (e.g., `00000-custom-auto1`) reflects the hyperparameter configuration that was used. In this case, `custom` indicates the training set (`--data`) and `auto1` indicates the *base configuration* that was used to select the hyperparameters (`--cfg`): + +| Base config | Description +| :---------- | :---------- +| `auto` (default) | Automatically select reasonable defaults based on resolution and GPU count. Serves as a good starting point for new datasets, but does not necessarily lead to optimal results. +| `stylegan2` | Reproduce results for StyleGAN2 config F at 1024x1024 using 1, 2, 4, or 8 GPUs. +| `paper256` | Reproduce results for FFHQ and LSUN Cat at 256x256 using 1, 2, 4, or 8 GPUs. +| `paper512` | Reproduce results for BreCaHAD and AFHQ at 512x512 using 1, 2, 4, or 8 GPUs. +| `paper1024` | Reproduce results for MetFaces at 1024x1024 using 1, 2, 4, or 8 GPUs. +| `cifar` | Reproduce results for CIFAR-10 (tuned configuration) using 1 or 2 GPUs. +| `cifarbaseline` | Reproduce results for CIFAR-10 (baseline configuration) using 1 or 2 GPUs. + +The training configuration can be further customized with additional arguments. Common examples: + +* `--aug=noaug` disables ADA (default: enabled). +* `--mirror=1` amplifies the dataset with x-flips. Often beneficial, even with ADA. +* `--resume=ffhq1024 --snap=10` performs transfer learning from FFHQ trained at 1024x1024. +* `--resume=~/training-runs//network-snapshot-.pkl` resumes where a previous training run left off. +* `--gamma=10` overrides R1 gamma. We strongly recommend trying out at least a few different values for each new dataset. + +Augmentation fine-tuning: + +* `--aug=ada --target=0.7` adjusts ADA target value (default: 0.6). +* `--aug=adarv` selects the alternative ADA heuristic (requires a separate validation set). +* `--augpipe=blit` limits the augmentation pipeline to pixel blitting only. +* `--augpipe=bgcfnc` enables all available augmentations (blit, geom, color, filter, noise, cutout). +* `--cmethod=bcr` enables bCR with small integer translations. + +Please refer to [`python train.py --help`](./docs/train-help.txt) for the full list. + +## Expected training time + +The total training time depends heavily on the resolution, number of GPUs, desired quality, dataset, and hyperparameters. In general, the training time can be expected to scale linearly with respect to the resolution and inversely proportional with respect to the number of GPUs. Small datasets tend to reach their lowest achievable FID faster than larger ones, but the convergence is somewhat less predictable. Transfer learning tends to converge significantly faster than training from scratch. + +To give a rough idea of typical training times, the following figure shows several examples of FID as a function of wallclock time. Each curve corresponds to training a given dataset from scratch using `--cfg=auto` with a given number of NVIDIA Tesla V100 GPUs: + +![Training curves](./docs/stylegan2-ada-training-curves.png) + +Please note that `--cfg=auto` only serves as a reasonable first guess for the hyperparameters — it does not necessarily lead to optimal results for a given dataset. For example, `--cfg=stylegan2` yields considerably better FID for FFHQ-140k at 1024x1024 than illustrated above. We recommend trying out at least a few different values of `--gamma` for each new dataset. + +## Preparing training set sweeps + +In the paper, we perform several experiments using artificially limited/amplified versions of the training data, such as `ffhq30k`, `ffhq140k`, and `lsuncat30k`. These are constructed by first unpacking the original dataset into a temporary directory with `python dataset_tool.py unpack` and then repackaging the appropriate versions into TFRecords with `python dataset_tool.py pack`. In the following examples, the temporary directories are created under `/tmp` and can be safely deleted afterwards. + +```.bash +# Unpack FFHQ images at 256x256 resolution. +python dataset_tool.py unpack --resolution=256 \ + --tfrecord_dir=~/ffhq-dataset/tfrecords/ffhq --output_dir=/tmp/ffhq-unpacked + +# Create subset with 30k images. +python dataset_tool.py pack --num_train=30000 --num_validation=10000 --seed=123 \ + --tfrecord_dir=~/datasets/ffhq30k --unpacked_dir=/tmp/ffhq-unpacked + +# Create amplified version with 140k images. +python dataset_tool.py pack --num_train=70000 --num_validation=0 --mirror=1 --seed=123 \ + --tfrecord_dir=~/datasets/ffhq140k --unpacked_dir=/tmp/ffhq-unpacked + +# Unpack LSUN Cat images at 256x256 resolution. +python dataset_tool.py unpack --resolution=256 \ + --tfrecord_dir=~/datasets/lsuncat200k --output_dir=/tmp/lsuncat200k-unpacked + +# Create subset with 30k images. +python dataset_tool.py pack --num_train=30000 --num_validation=10000 --seed=123 \ + --tfrecord_dir=~/datasets/lsuncat30k --unpacked_dir=/tmp/lsuncat200k-unpacked +``` + +Please note that when training with artifically limited/amplified datasets, the quality metrics (e.g., `fid50k_full`) should still be evaluated against the corresponding original datasets. This can be done by specifying a separate metric dataset for `train.py` and `calc_metrics.py` using the `--metricdata` argument. For example: + +```.bash +python train.py [OTHER_OPTIONS] --data=~/datasets/ffhq30k --metricdata=~/ffhq-dataset/tfrecords/ffhq +``` + +## Reproducing training runs from the paper + +The pre-trained network pickles ([`stylegan2-ada/pretrained/paper-fig*`](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/)) reflect the training configuration the same way as the output directory names, making it straightforward to reproduce a given training run from the paper. For example: + +```.bash +# 1. AFHQ Dog +# paper-fig11a-small-datasets/afhqdog-mirror-paper512-ada.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/afhqdog \ + --mirror=1 --cfg=paper512 --aug=ada + +# 2. Class-conditional CIFAR-10 +# pretrained/paper-fig11b-cifar10/cifar10c-cifar-ada-best-fid.pkl +python train.py --outdir=~/training-runs --gpus=2 --data=~/datasets/cifar10c \ + --cfg=cifar --aug=ada + +# 3. MetFaces with transfer learning from FFHQ +# paper-fig11a-small-datasets/metfaces-mirror-paper1024-ada-resumeffhq1024.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/metfaces \ + --mirror=1 --cfg=paper1024 --aug=ada --resume=ffhq1024 --snap=10 + +# 4. 10k subset of FFHQ with ADA and bCR +# paper-fig7c-training-set-sweeps/ffhq10k-paper256-ada-bcr.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/datasets/ffhq10k \ + --cfg=paper256 --aug=ada --cmethod=bcr --metricdata=~/ffhq-dataset/tfrecords/ffhq + +# 5. StyleGAN2 config F +# transfer-learning-source-nets/ffhq-res1024-mirror-stylegan2-noaug.pkl +python train.py --outdir=~/training-runs --gpus=8 --data=~/ffhq-dataset/tfrecords/ffhq \ + --res=1024 --mirror=1 --cfg=stylegan2 --aug=noaug --metrics=fid50k +``` + +**Notes**: +* You can use fewer GPUs than shown in the above examples. This will only increase the training time — it will not affect the quality of the results. +* Example 3 specifies `--snap=10` to export network pickles more frequently than usual. This is recommended, because transfer learning tends to yield very fast convergence. +* Example 4 specifies `--metricdata` to evaluate quality metrics against the original FFHQ dataset, not the artificially limited 10k subset used for training. +* Example 5 specifies `--metrics=fid50k` to evaluate FID the same way as in the StyleGAN2 paper (see below). + +## Quality metrics + +By default, `train.py` will automatically compute FID for each network pickle. We strongly recommend inspecting `metric-fid50k_full.txt` at regular intervals to monitor the training progress. When desired, the automatic computation can be disabled with `--metrics none` to speed up the training. + +Additional quality metrics can also be computed after the training: + +```.bash +# Previous training run: look up options automatically, save result to text file. +python calc_metrics.py --metrics=pr50k3_full \ + --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl + +# Pretrained network pickle: specify dataset explicitly, print result to stdout. +python calc_metrics.py --metrics=fid50k_full --metricdata=~/datasets/ffhq --mirror=1 \ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +``` + +The first example will automatically find `training_options.json` stored alongside the network pickle and perform the same operation as if `--metrics pr50k3_full` had been specified during training. The second example will download a pre-trained network pickle, in which case the values of `--mirror` and `--metricdata` have to be specified explicitly. + +Note that many of the metrics have a significant one-off cost (up to an hour or more) when they are calculated for the first time using a given dataset. Also note that the evaluation is done using a different random seed each time, so the results will vary if the same metric is computed multiple times. + +We employ the following metrics in the ADA paper. The expected execution times correspond to using one Tesla V100 GPU at 1024x1024 and 256x256 resolution: + +| Metric | 1024x1024 | 256x256 | Description | +| :----- | :-------: | :-----: | :---------- | +| `fid50k_full` | 15 min | 5 min | Fréchet inception distance[1] against the full dataset. +| `kid50k_full` | 15 min | 5 min | Kernel inception distance[2] against the full dataset. +| `pr50k3_full` | 20 min | 10 min | Precision and recall[3] againt the full dataset. +| `is50k` | 25 min | 5 min | Inception score[4] for CIFAR-10. + +In addition, all metrics that were used in the [StyleGAN](https://github.com/NVlabs/stylegan) and [StyleGAN2](https://github.com/NVlabs/stylegan2) papers are also supported for backwards compatibility: + +| Legacy: StyleGAN2 | 1024x1024 | Description | +| :---------------- | :-------: | :---------- | +| `fid50k` | 15 min | Fréchet inception distance against 50k real images. +| `kid50k` | 15 min | Kernel inception distance against 50k real images. +| `pr50k3` | 20 min | Precision and recall against 50k real images. +| `ppl2_wend` | 40 min | Perceptual path length[5] in W at path endpoints against full image. + +| Legacy: StyleGAN | 1024x1024 | Description | +| :--------------- | :-------: | :---------- | +| `ppl_zfull` | 40 min | Perceptual path length in Z for full paths against cropped image. +| `ppl_wfull` | 40 min | Perceptual path length in W for full paths against cropped image. +| `ppl_zend` | 40 min | Perceptual path length in Z at path endpoints against cropped image. +| `ppl_wend` | 40 min | Perceptual path length in W at path endpoints against cropped image. +| `ls` | 10 hrs | Linear separability[5] with respect to CelebA attributes. + +References: +1. [GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium](https://arxiv.org/abs/1706.08500), Heusel et al. 2017 +2. [Demystifying MMD GANs](https://arxiv.org/abs/1801.01401), Bińkowski et al. 2018 +3. [Improved Precision and Recall Metric for Assessing Generative Models](https://arxiv.org/abs/1904.06991), Kynkäänniemi et al. 2019 +4. [Improved Techniques for Training GANs](https://arxiv.org/abs/1606.03498), Salimans et al. 2016 +5. [A Style-Based Generator Architecture for Generative Adversarial Networks](https://arxiv.org/abs/1812.04948), Karras et al. 2018 + +## License + +Copyright © 2020, NVIDIA Corporation. All rights reserved. + +This work is made available under the [Nvidia Source Code License](https://nvlabs.github.io/stylegan2-ada/license.html). + +## Citation + +``` +@inproceedings{Karras2020ada, + title = {Training Generative Adversarial Networks with Limited Data}, + author = {Tero Karras and Miika Aittala and Janne Hellsten and Samuli Laine and Jaakko Lehtinen and Timo Aila}, + booktitle = {Proc. NeurIPS}, + year = {2020} +} +``` + +## Development + +This is a research reference implementation and is treated as a +one-time code drop. As such, we do not accept outside code +contributions in the form of pull requests. + +## Acknowledgements + +We thank David Luebke for helpful comments; Tero Kuosmanen and Sabu Nadarajan for their support with compute infrastructure; and Edgar Schönfeld for guidance on setting up unconditional BigGAN. diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/calc_metrics.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/calc_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..78b1e440d20b94a2e19b7498d94d5f6ddad4e69c --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/calc_metrics.py @@ -0,0 +1,163 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Calculate quality metrics for previous training run or pretrained network pickle.""" + +import os +import argparse +import json +import pickle +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_defaults + +#---------------------------------------------------------------------------- + +class UserError(Exception): + pass + +#---------------------------------------------------------------------------- + +def calc_metrics(network_pkl, metric_names, metricdata, mirror, gpus): + tflib.init_tf() + + # Initialize metrics. + metrics = [] + for name in metric_names: + if name not in metric_defaults.metric_defaults: + raise UserError('\n'.join(['--metrics can only contain the following values:', 'none'] + list(metric_defaults.metric_defaults.keys()))) + metrics.append(dnnlib.util.construct_class_by_name(**metric_defaults.metric_defaults[name])) + + # Load network. + if not dnnlib.util.is_url(network_pkl, allow_file_urls=True) and not os.path.isfile(network_pkl): + raise UserError('--network must point to a file or URL') + print(f'Loading network from "{network_pkl}"...') + with dnnlib.util.open_url(network_pkl) as f: + _G, _D, Gs = pickle.load(f) + Gs.print_layers() + + # Look up training options. + run_dir = None + training_options = None + if os.path.isfile(network_pkl): + potential_run_dir = os.path.dirname(network_pkl) + potential_json_file = os.path.join(potential_run_dir, 'training_options.json') + if os.path.isfile(potential_json_file): + print(f'Looking up training options from "{potential_json_file}"...') + run_dir = potential_run_dir + with open(potential_json_file, 'rt') as f: + training_options = json.load(f, object_pairs_hook=dnnlib.EasyDict) + if training_options is None: + print('Could not look up training options; will rely on --metricdata and --mirror') + + # Choose dataset options. + dataset_options = dnnlib.EasyDict() + if training_options is not None: + dataset_options.update(training_options.metric_dataset_args) + dataset_options.resolution = Gs.output_shapes[0][-1] + dataset_options.max_label_size = Gs.input_shapes[1][-1] + if metricdata is not None: + if not os.path.isdir(metricdata): + raise UserError('--metricdata must point to a directory containing *.tfrecords') + dataset_options.path = metricdata + if mirror is not None: + dataset_options.mirror_augment = mirror + if 'path' not in dataset_options: + raise UserError('--metricdata must be specified explicitly') + + # Print dataset options. + print() + print('Dataset options:') + print(json.dumps(dataset_options, indent=2)) + + # Evaluate metrics. + for metric in metrics: + print() + print(f'Evaluating {metric.name}...') + metric.configure(dataset_args=dataset_options, run_dir=run_dir) + metric.run(network_pkl=network_pkl, num_gpus=gpus) + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + +def _parse_comma_sep(s): + if s is None or s.lower() == 'none' or s == '': + return [] + return s.split(',') + +#---------------------------------------------------------------------------- + +_cmdline_help_epilog = '''examples: + + # Previous training run: look up options automatically, save result to text file. + python %(prog)s --metrics=pr50k3_full \\ + --network=~/training-runs/00000-ffhq10k-res64-auto1/network-snapshot-000000.pkl + + # Pretrained network pickle: specify dataset explicitly, print result to stdout. + python %(prog)s --metrics=fid50k_full --metricdata=~/datasets/ffhq --mirror=1 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl + +available metrics: + + ADA paper: + fid50k_full Frechet inception distance against the full dataset. + kid50k_full Kernel inception distance against the full dataset. + pr50k3_full Precision and recall againt the full dataset. + is50k Inception score for CIFAR-10. + + Legacy: StyleGAN2 + fid50k Frechet inception distance against 50k real images. + kid50k Kernel inception distance against 50k real images. + pr50k3 Precision and recall against 50k real images. + ppl2_wend Perceptual path length in W at path endpoints against full image. + + Legacy: StyleGAN + ppl_zfull Perceptual path length in Z for full paths against cropped image. + ppl_wfull Perceptual path length in W for full paths against cropped image. + ppl_zend Perceptual path length in Z at path endpoints against cropped image. + ppl_wend Perceptual path length in W at path endpoints against cropped image. + ls Linear separability with respect to CelebA attributes. +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Calculate quality metrics for previous training run or pretrained network pickle.', + epilog=_cmdline_help_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename or URL', dest='network_pkl', metavar='PATH') + parser.add_argument('--metrics', help='Comma-separated list or "none" (default: %(default)s)', dest='metric_names', type=_parse_comma_sep, default='fid50k_full', metavar='LIST') + parser.add_argument('--metricdata', help='Dataset to evaluate metrics against (default: look up from training options)', metavar='PATH') + parser.add_argument('--mirror', help='Whether the dataset was augmented with x-flips during training (default: look up from training options)', type=_str_to_bool, metavar='BOOL') + parser.add_argument('--gpus', help='Number of GPUs to use (default: %(default)s)', type=int, default=1, metavar='INT') + + args = parser.parse_args() + try: + calc_metrics(**vars(args)) + except UserError as err: + print(f'Error: {err}') + exit(1) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dataset_tool.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dataset_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..298f42536c636c09784a2cf2b5d58ff9c57e2bc1 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dataset_tool.py @@ -0,0 +1,995 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Tool for creating multi-resolution TFRecords datasets.""" + +import os +import sys +import glob +import argparse +import threading +import six.moves.queue as Queue +import traceback +import numpy as np +import tensorflow as tf +import PIL.Image +import dnnlib.tflib as tflib +import scipy +import scipy.ndimage +import scipy.misc +import datetime +from tqdm import tqdm + +from training import dataset + +#---------------------------------------------------------------------------- + +def error(msg): + print('Error: ' + msg) + exit(1) + +#---------------------------------------------------------------------------- + +class TFRecordExporter: + def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10, tfr_prefix=None): + self.tfrecord_dir = tfrecord_dir + if tfr_prefix is None: + self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) + else: + self.tfr_prefix = os.path.join(self.tfrecord_dir, tfr_prefix) + self.expected_images = expected_images + self.cur_images = 0 + self.shape = None + self.resolution_log2 = None + self.tfr_writers = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + + if self.print_progress: + name = '' if tfr_prefix is None else f' ({tfr_prefix})' + print(f'Creating dataset "{tfrecord_dir}"{name}') + if not os.path.isdir(self.tfrecord_dir): + os.makedirs(self.tfrecord_dir) + assert os.path.isdir(self.tfrecord_dir) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for tfr_writer in self.tfr_writers: + tfr_writer.close() + self.tfr_writers = [] + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order. + order = np.arange(self.expected_images) + np.random.RandomState(123).shuffle(order) + return order + + def add_image(self, img): + if self.print_progress and self.cur_images % self.progress_interval == 0: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + if self.shape is None: + self.shape = img.shape + self.resolution_log2 = int(np.log2(self.shape[1])) + assert self.shape[0] in [1, 3] + assert self.shape[1] == self.shape[2] + assert self.shape[1] == 2**self.resolution_log2 + tfr_opt = tf.io.TFRecordOptions(tf.compat.v1.io.TFRecordCompressionType.NONE) + for lod in range(self.resolution_log2 - 1): + tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod) + self.tfr_writers.append(tf.io.TFRecordWriter(tfr_file, tfr_opt)) + assert img.shape == self.shape + for lod, tfr_writer in enumerate(self.tfr_writers): + if lod: + img = img.astype(np.float32) + img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 + quant = np.rint(img).clip(0, 255).astype(np.uint8) + ex = tf.train.Example(features=tf.train.Features(feature={ + 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), + 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) + tfr_writer.write(ex.SerializeToString()) + self.cur_images += 1 + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +# ---------------------------------------------------------------------------- + +class HDF5Exporter: + def __init__(self, h5_filename, resolution, channels, compress=False, expected_images=0, print_progress=True, progress_interval=10): + rlog2 = int(np.floor(np.log2(resolution))) + assert resolution == 2 ** rlog2 + + self.h5_filename = h5_filename + self.resolution = resolution + self.channels = channels + self.expected_images = expected_images + self.cur_images = 0 + self.h5_file = None + self.h5_lods = [] + self.buffers = [] + self.buffer_sizes = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + + if self.print_progress: + print('Creating dataset "%s"' % h5_filename) + import h5py # conda install h5py + self.h5_file = h5py.File(h5_filename, 'w') + for lod in range(rlog2, -1, -1): + r = 2 ** lod + c = channels + bytes_per_item = c * (r ** 2) + chunk_size = int(np.ceil(128.0 / bytes_per_item)) + buffer_size = int(np.ceil(512.0 * np.exp2(20) / bytes_per_item)) + compression = 'gzip' if compress else None + compression_opts = 4 if compress else None + lod = self.h5_file.create_dataset( + 'data%dx%d' % (r, r), shape=(0, c, r, r), dtype=np.uint8, + maxshape=(None, c, r, r), chunks=(chunk_size, c, r, r), + compression=compression, compression_opts=compression_opts) + self.h5_lods.append(lod) + self.buffers.append(np.zeros((buffer_size, c, r, r), dtype=np.uint8)) + self.buffer_sizes.append(0) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for lod in range(len(self.h5_lods)): + self._flush_lod(lod) + self.h5_file.close() + self.h5_file = None + self.h5_lods = None + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def add_image(self, img): + self.add_images(np.stack([img])) + + def add_images(self, img): + assert img.ndim == 4 and img.shape[1] == self.channels and img.shape[2] == img.shape[3] + assert img.shape[2] >= self.resolution and img.shape[2] == 2 ** int(np.floor(np.log2(img.shape[2]))) + if self.print_progress and (self.cur_images - 1) % self.progress_interval >= self.progress_interval - img.shape[0]: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + + for lod in range(len(self.h5_lods)): + while img.shape[2] > self.resolution // (2 ** lod): + img = img.astype(np.float32) + img = (img[:, :, 0::2, 0::2] + img[:, :, 0::2, 1::2] + img[:, :, 1::2, 0::2] + img[:, :, 1::2, 1::2]) * 0.25 + quant = np.uint8(np.clip(np.round(img), 0, 255)) + ofs = 0 + while ofs < quant.shape[0]: + num = min(quant.shape[0] - ofs, self.buffers[lod].shape[0] - self.buffer_sizes[lod]) + self.buffers[lod][self.buffer_sizes[lod]: self.buffer_sizes[lod] + num] = quant[ofs: ofs + num] + self.buffer_sizes[lod] += num + if self.buffer_sizes[lod] == self.buffers[lod].shape[0]: + self._flush_lod(lod) + ofs += num + self.cur_images += img.shape[0] + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(os.path.splitext(self.h5_filename)[0] + '-labels.npy', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def _flush_lod(self, lod): + num = self.buffer_sizes[lod] + if num > 0: + self.h5_lods[lod].resize(self.h5_lods[lod].shape[0] + num, axis=0) + self.h5_lods[lod][-num:] = self.buffers[lod][:num] + self.buffer_sizes[lod] = 0 + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + +#---------------------------------------------------------------------------- + +class ExceptionInfo(object): + def __init__(self): + self.value = sys.exc_info()[1] + self.traceback = traceback.format_exc() + +#---------------------------------------------------------------------------- + +class WorkerThread(threading.Thread): + def __init__(self, task_queue): + threading.Thread.__init__(self) + self.task_queue = task_queue + + def run(self): + while True: + func, args, result_queue = self.task_queue.get() + if func is None: + break + try: + result = func(*args) + except: + result = ExceptionInfo() + result_queue.put((result, args)) + +#---------------------------------------------------------------------------- + +class ThreadPool(object): + def __init__(self, num_threads): + assert num_threads >= 1 + self.task_queue = Queue.Queue() + self.result_queues = dict() + self.num_threads = num_threads + for _idx in range(self.num_threads): + thread = WorkerThread(self.task_queue) + thread.daemon = True + thread.start() + + def add_task(self, func, args=()): + assert hasattr(func, '__call__') # must be a function + if func not in self.result_queues: + self.result_queues[func] = Queue.Queue() + self.task_queue.put((func, args, self.result_queues[func])) + + def get_result(self, func): # returns (result, args) + result, args = self.result_queues[func].get() + if isinstance(result, ExceptionInfo): + print('\n\nWorker thread caught an exception:\n' + result.traceback) + raise result.value + return result, args + + def finish(self): + for _idx in range(self.num_threads): + self.task_queue.put((None, (), None)) + + def __enter__(self): # for 'with' statement + return self + + def __exit__(self, *excinfo): + self.finish() + + def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): + if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4 + assert max_items_in_flight >= 1 + results = [] + retire_idx = [0] + + def task_func(prepared, _idx): + return process_func(prepared) + + def retire_result(): + processed, (_prepared, idx) = self.get_result(task_func) + results[idx] = processed + while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: + yield post_func(results[retire_idx[0]]) + results[retire_idx[0]] = None + retire_idx[0] += 1 + + for idx, item in enumerate(item_iterator): + prepared = pre_func(item) + results.append(None) + self.add_task(func=task_func, args=(prepared, idx)) + while retire_idx[0] < idx - max_items_in_flight + 2: + for res in retire_result(): yield res + while retire_idx[0] < len(results): + for res in retire_result(): yield res + +#---------------------------------------------------------------------------- + +def info(tfrecord_dir): + print() + print('%-20s%s' % ('Dataset name:', os.path.basename(tfrecord_dir))) + + bytes_total = 0 + bytes_max = 0 + num_files = 0 + for f in sorted(glob.glob(os.path.join(tfrecord_dir, '*'))): + if os.path.isfile(f): + fs = os.stat(f).st_size + bytes_total += fs + bytes_max = max(bytes_max, fs) + num_files += 1 + print('%-20s%.2f' % ('Total size GB:', bytes_total / (1 << 30))) + print('%-20s%.2f' % ('Largest file GB:', bytes_max / (1 << 30))) + print('%-20s%d' % ('Num files:', num_files)) + + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('%-20s%d' % ('Image width:', dset.shape[2])) + print('%-20s%d' % ('Image height:', dset.shape[1])) + print('%-20s%d' % ('Image channels:', dset.shape[0])) + print('%-20s%s' % ('Image datatype:', dset.dtype)) + print('%-20s%d' % ('Label size:', dset.label_size)) + print('%-20s%s' % ('Label datatype:', dset.label_dtype)) + + num_images = 0 + label_min = np.finfo(np.float64).max + label_max = np.finfo(np.float64).min + label_norm = 0 + lod = max(dset.resolution_log2 - 2, 0) + while True: + print('\r%-20s%d' % ('Num images:', num_images), end='', flush=True) + _images, labels = dset.get_minibatch_np(10000, lod=lod) # not accurate + if labels is None: + break + num_images += labels.shape[0] + if dset.label_size: + label_min = min(label_min, np.min(labels)) + label_max = max(label_max, np.max(labels)) + label_norm += np.sum(np.sqrt(np.sum(np.square(labels), axis=1))) + + print('\r%-20s%d' % ('Num images:', num_images)) + print('%-20s%s' % ('Label range:', '%g -- %g' % (label_min, label_max) if num_images and dset.label_size else 'n/a')) + print('%-20s%s' % ('Label L2 norm:', '%g' % (label_norm / num_images) if num_images and dset.label_size else 'n/a')) + print() + +#---------------------------------------------------------------------------- + +def display(tfrecord_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + import cv2 # pip install opencv-python + + idx = 0 + while True: + images, labels = dset.get_minibatch_np(1) + if images is None: + break + if idx == 0: + print('Displaying images') + cv2.namedWindow('dataset_tool') + print('Press SPACE or ENTER to advance, ESC to exit') + print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) + cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR + idx += 1 + if cv2.waitKey() == 27: + break + print('\nDisplayed %d images.' % idx) + +#---------------------------------------------------------------------------- + +def extract(tfrecord_dir, output_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + images, _labels = dset.get_minibatch_np(1) + if images is None: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + img.save(os.path.join(output_dir, 'img%08d.png' % idx)) + idx += 1 + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): + max_label_size = 0 if ignore_labels else 'full' + print('Loading dataset "%s"' % tfrecord_dir_a) + tflib.init_tf() + dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle=False) + print('Loading dataset "%s"' % tfrecord_dir_b) + dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('Comparing datasets') + idx = 0 + identical_images = 0 + identical_labels = 0 + while True: + if idx % 100 == 0: + print('%d\r' % idx, end='', flush=True) + images_a, labels_a = dset_a.get_minibatch_np(1) + images_b, labels_b = dset_b.get_minibatch_np(1) + if images_a is None or images_b is None: + if images_a is not None or images_b is not None: + print('Datasets contain different number of images') + break + if images_a.shape == images_b.shape and np.all(images_a == images_b): + identical_images += 1 + else: + print('Image %d is different' % idx) + if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): + identical_labels += 1 + else: + print('Label %d is different' % idx) + idx += 1 + print('Identical images: %d / %d' % (identical_images, idx)) + if not ignore_labels: + print('Identical labels: %d / %d' % (identical_labels, idx)) + +#---------------------------------------------------------------------------- + +def create_mnist(tfrecord_dir, mnist_dir): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: + labels = np.frombuffer(file.read(), np.uint8, offset=8) + images = images.reshape(-1, 1, 28, 28) + images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (60000,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + images = images.reshape(-1, 28, 28) + images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + + with TFRecordExporter(tfrecord_dir, num_images) as tfr: + rnd = np.random.RandomState(random_seed) + for _idx in range(num_images): + tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) + +#---------------------------------------------------------------------------- + +def create_cifar10(tfrecord_dir, cifar10_dir, ignore_labels): + print('Loading CIFAR-10 from "%s"' % cifar10_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 6): + with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data['data'].reshape(-1, 3, 32, 32)) + labels.append(data['labels']) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert ignore_labels in [0, 1] + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype in [np.int32, np.int64] + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + if not ignore_labels: + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_cifar100(tfrecord_dir, cifar100_dir): + print('Loading CIFAR-100 from "%s"' % cifar100_dir) + import pickle + with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images = data['data'].reshape(-1, 3, 32, 32) + labels = np.array(data['fine_labels']) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 99 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_svhn(tfrecord_dir, svhn_dir): + print('Loading SVHN from "%s"' % svhn_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 4): + with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data[0]) + labels.append(data[1]) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (73257,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True, lock=False).begin(write=False) as txn: + total_images = txn.stat()['entries'] + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images) as tfr: + for _idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + crop = np.min(img.shape[:2]) + img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + +#---------------------------------------------------------------------------- + +def create_lsun_wide(tfrecord_dir, lmdb_dir, width=512, height=384, max_images=None): + assert width == 2 ** int(np.round(np.log2(width))) + assert height <= width + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images, print_progress=False) as tfr: + for idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + + ch = int(np.round(width * img.shape[0] / img.shape[1])) + if img.shape[1] < width or ch < height: + continue + + img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((width, height), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + + canvas = np.zeros([3, width, width], dtype=np.uint8) + canvas[:, (width - height) // 2 : (width + height) // 2] = img + tfr.add_image(canvas) + print('\r%d / %d => %d ' % (idx + 1, total_images, tfr.cur_images), end='') + + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + print() + +#---------------------------------------------------------------------------- + +def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): + print('Loading CelebA from "%s"' % celeba_dir) + glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') + image_filenames = sorted(glob.glob(glob_pattern)) + expected_images = 202599 + if len(image_filenames) != expected_images: + error('Expected to find %d images' % expected_images) + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + assert img.shape == (218, 178, 3) + img = img[cy - 64 : cy + 64, cx - 64 : cx + 64] + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_images(tfrecord_dir, image_dir, shuffle): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): + print('Loading HDF5 archive from "%s"' % hdf5_filename) + import h5py # conda install h5py + with h5py.File(hdf5_filename, 'r') as hdf5_file: + hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3]) + with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0]) + for idx in range(order.size): + tfr.add_image(hdf5_data[order[idx]]) + npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' + if os.path.isfile(npy_filename): + tfr.add_labels(np.load(npy_filename)[order]) + +#---------------------------------------------------------------------------- + +def convert_to_hdf5(hdf5_filename, tfrecord_dir, compress): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + with HDF5Exporter(hdf5_filename, resolution=dset.shape[1], channels=dset.shape[0], compress=compress) as h5: + all_labels = [] + while True: + images, labels = dset.get_minibatch_np(1) + if images is None: + break + h5.add_images(images) + all_labels.append(labels) + all_labels = np.concatenate(all_labels) + if all_labels.size: + h5.add_labels(all_labels) + +#---------------------------------------------------------------------------- + +def hdf5_from_images(hdf5_filename, image_dir, compress): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with HDF5Exporter(hdf5_filename, resolution=resolution, channels=channels, compress=compress, expected_images=len(image_filenames)) as h5: + for image_filename in image_filenames: + img = np.asarray(PIL.Image.open(image_filename)) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose([2, 0, 1]) # HWC => CHW + h5.add_image(img) + +#---------------------------------------------------------------------------- + +def make_png_path(outdir, idx): + idx_str = f'{idx:08d}' + return f'{os.path.join(outdir, idx_str[:5])}/img{idx_str}.png' + +def unpack(tfrecord_dir, output_dir, resolution=None): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf() + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle=False) + tflib.init_uninitialized_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + labels = [] + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + images, lbls = dset.get_minibatch_np(1) + if images is None: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + if resolution is not None: + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + assert lbls.shape[0] == 1 + labels.append(lbls[0]) + png_fname = make_png_path(output_dir, idx) + os.makedirs(os.path.dirname(png_fname), exist_ok=True) + img.save(png_fname) + idx += 1 + np.savez(f'{output_dir}/pack_extras.npz', labels=np.array(labels, dtype=np.uint8), num_images=idx) + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def pack(unpacked_dir, tfrecord_dir, num_train=None, num_validation=None, mirror=0, seed=None): + + def export_samples(source_idx, tfr_prefix): + if source_idx.shape[0] == 0: return + if source_idx.shape != (source_idx.shape[0], 2): + assert len(source_idx.shape) == 1 + source_idx = np.stack([np.zeros(source_idx.shape[0], dtype=np.uint8), source_idx], axis=-1) + with TFRecordExporter(tfrecord_dir, len(source_idx), tfr_prefix=tfr_prefix) as tfr: + for mirror, idx in source_idx: + img = np.asarray(PIL.Image.open(make_png_path(unpacked_dir, idx))) + img = img.transpose([2, 0, 1]) # HWC => CHW + if mirror != 0: + img = img[:, :, ::-1] + tfr.add_image(img) + tfr.add_labels(labels_onehot[source_idx[:,1]]) + + print(f'Loading an unpacked dataset from "{unpacked_dir}"') + + meta = np.load(f'{unpacked_dir}/pack_extras.npz') + num_images = int(meta['num_images']) + labels_onehot = meta['labels'] + assert (labels_onehot.shape[0] == num_images) and (len(labels_onehot.shape) == 2) + + order = np.arange(num_images) + if seed is not None: + np.random.RandomState(seed).shuffle(order) + + # Size the training and validation sets based on command line args. + # + # If the training set size is not specified on the command line, use all + # except what's set aside for the validation set. + n_train = num_train if num_train is not None else num_images + n_valid = num_validation + if num_train is None: + n_train -= n_valid + assert n_train > 0 + assert (n_train + n_valid) <= num_images + + train_idx = order[0:n_train] + valid_idx = order[n_train:n_train+n_valid] + + if mirror != 0: + n = train_idx.shape[0] + train_idx = np.concatenate([ + np.stack([np.zeros(n, dtype=np.uint8), train_idx], axis=-1), + np.stack([np.ones(n, dtype=np.uint8), train_idx], axis=-1) + ]) + if seed is not None: + np.random.RandomState(seed).shuffle(train_idx) + + tfr = os.path.basename(tfrecord_dir) + export_samples(train_idx, tfr_prefix=tfr) + export_samples(valid_idx, tfr_prefix=f'validation-{tfr}') + +#---------------------------------------------------------------------------- + +def extract_brecahad_crops(brecahad_dir, output_dir, cropsize=256): + params = { + 256: { 'overlap': 0.0 }, + 512: { 'overlap': 0.5 } + } + if cropsize not in params: + print('--cropsize must be one of:', ', '.join(str(x) for x in params.keys())) + sys.exit(1) + + os.makedirs(output_dir, exist_ok=True) + + incr = int(cropsize*(1-params[cropsize]['overlap'])) + out_idx = 0 + for fname in tqdm(sorted(glob.glob(os.path.join(brecahad_dir, '*.tif')))): + src = PIL.Image.open(fname) + w, h = src.size + for x in range(0, w-cropsize+1, incr): + for y in range(0, h-cropsize+1, incr): + cropimg = src.crop((x, y, x+cropsize, y+cropsize)) + cropimg.save(os.path.join(output_dir, f'{out_idx:04d}.png')) + out_idx += 1 + print(f'Extracted {out_idx} image crops.') + +#---------------------------------------------------------------------------- + +def execute_cmdline(argv): + prog = argv[0] + parser = argparse.ArgumentParser( + prog = prog, + description = 'Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.', + epilog = 'Type "%s -h" for more information.' % prog) + + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + def add_command(cmd, desc, example=None): + epilog = 'Example: %s %s' % (prog, example) if example is not None else None + return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) + + p = add_command( 'info', 'Display general info about dataset.', + 'info datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'display', 'Display images in dataset.', + 'display datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'extract', 'Extract images from dataset.', + 'extract datasets/mnist mnist-images') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + p.add_argument( 'output_dir', help='Directory to extract the images into') + + p = add_command( 'compare', 'Compare two datasets.', + 'compare datasets/mydataset datasets/mnist') + p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset') + p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_mnist', 'Create dataset for MNIST.', + 'create_mnist datasets/mnist ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + + p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.', + 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000) + p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123) + + p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.', + 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.', + 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100') + + p = add_command( 'create_svhn', 'Create dataset for SVHN.', + 'create_svhn datasets/svhn ~/downloads/svhn') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'svhn_dir', help='Directory containing SVHN') + + p = add_command( 'create_lsun', 'Create dataset for single LSUN category.', + 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_lsun_wide', 'Create LSUN dataset with non-square aspect ratio.', + 'create_lsun_wide datasets/lsun-car-512x384 ~/downloads/lsun/car_lmdb --width 512 --height 384') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--width', help='Output width (default: 512)', type=int, default=512) + p.add_argument( '--height', help='Output height (default: 384)', type=int, default=384) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_celeba', 'Create dataset for CelebA.', + 'create_celeba datasets/celeba ~/downloads/celeba') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'celeba_dir', help='Directory containing CelebA') + p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89) + p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) + + p = add_command( 'create_from_images', 'Create dataset from a directory full of images.', + 'create_from_images datasets/mydataset myimagedir') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.', + 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'convert_to_hdf5', 'Convert dataset to legacy HDF5 archive.', + 'convert_to_hdf5 datasets/celebahq.h5 datasets/celebahq') + p.add_argument( 'hdf5_filename', help='HDF5 archive to be created') + p.add_argument( 'tfrecord_dir', help='Dataset directory to load the images from') + p.add_argument( '--compress', help='Compress the data (default: 0)', type=int, default=0) + + p = add_command( 'hdf5_from_images', 'Create HDF5 archive from a directory of images.', + 'hdf5_from_images datasets/mydataset.h5 myimagedir') + p.add_argument( 'hdf5_filename', help='HDF5 archive to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--compress', help='Compress the data (default: 0)', type=int, default=0) + + p = add_command( 'unpack', 'Unpack a TFRecords dataset to labels and images for later repackaging with `pack`.') + p.add_argument( '--tfrecord_dir', help='Directory containing the source dataset in TFRecords format', required=True) + p.add_argument( '--output_dir', help='Output directory where to extract the dataset as PNG files', required=True) + p.add_argument( '--resolution', help='Resize images to (resolution,resolution) (default: None = no resizing)', type=int, default=None) + + p = add_command( 'pack', 'Repackage an unpacked dataset into TFRecords.') + p.add_argument( '--unpacked_dir', help='Source directory containing an unpacked tfrecords dataset') + p.add_argument( '--tfrecord_dir', help='New dataset directory to be created') + p.add_argument( '--num_train', help='Number of images to pick for the training set (default: None = all)', type=int, default=None) + p.add_argument( '--num_validation', help='Number of images to pick for the validation set (default: 0 = no images)', type=int, default=0) + p.add_argument( '--mirror', help='Number of images to pick for the training set (default: 0 = no mirroring)', type=int, default=0) + p.add_argument( '--seed', help='Shuffle random seed. (default: None = do not shuffle)', type=int, default=None) + + p = add_command( 'extract_brecahad_crops', 'Extract crops from the original BreCaHAD images') + p.add_argument( '--brecahad_dir', help='Source directory for BreCaHAD images. Should contain .tif files.', required=True) + p.add_argument( '--output_dir', help='Output directory for image crops. Will contain .png files', required=True) + p.add_argument( '--cropsize', help='Crop size (resolution,resolution)', type=int, default=256) + + args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) + func = globals()[args.command] + del args.command + func(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + execute_cmdline(sys.argv) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a159d3b5dc534d6f9b18dd58f9b9830b85879abb --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +from .util import EasyDict, make_cache_dir_path diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac2a2d93e49cd91624bce1e9e31571a9afc97aff --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +from . import autosummary +from . import network +from . import optimizer +from . import tfutil +from . import custom_ops + +from .tfutil import * +from .network import Network + +from .optimizer import Optimizer + +from .custom_ops import get_plugin diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/autosummary.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/autosummary.py new file mode 100644 index 0000000000000000000000000000000000000000..08ca1ead8f926e853033fb8d7e7f1e97fb7c06fe --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/autosummary.py @@ -0,0 +1,193 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Helper for adding automatically tracked values to Tensorboard. + +Autosummary creates an identity op that internally keeps track of the input +values and automatically shows up in TensorBoard. The reported value +represents an average over input components. The average is accumulated +constantly over time and flushed when save_summaries() is called. + +Notes: +- The output tensor must be used as an input for something else in the + graph. Otherwise, the autosummary op will not get executed, and the average + value will not get accumulated. +- It is perfectly fine to include autosummaries with the same name in + several places throughout the graph, even if they are executed concurrently. +- It is ok to also pass in a python scalar or numpy array. In this case, it + is added to the average immediately. +""" + +from collections import OrderedDict +import numpy as np +import tensorflow as tf +from tensorboard import summary as summary_lib +from tensorboard.plugins.custom_scalar import layout_pb2 + +from . import tfutil +from .tfutil import TfExpression +from .tfutil import TfExpressionEx + +# Enable "Custom scalars" tab in TensorBoard for advanced formatting. +# Disabled by default to reduce tfevents file size. +enable_custom_scalars = False + +_dtype = tf.float64 +_vars = OrderedDict() # name => [var, ...] +_immediate = OrderedDict() # name => update_op, update_value +_finalized = False +_merge_op = None + + +def _create_var(name: str, value_expr: TfExpression) -> TfExpression: + """Internal helper for creating autosummary accumulators.""" + assert not _finalized + name_id = name.replace("/", "_") + v = tf.cast(value_expr, _dtype) + + if v.shape.is_fully_defined(): + size = np.prod(v.shape.as_list()) + size_expr = tf.constant(size, dtype=_dtype) + else: + size = None + size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) + + if size == 1: + if v.shape.ndims != 0: + v = tf.reshape(v, []) + v = [size_expr, v, tf.square(v)] + else: + v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] + v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) + + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): + var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + + if name in _vars: + _vars[name].append(var) + else: + _vars[name] = [var] + return update_op + + +def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: + """Create a new autosummary. + + Args: + name: Name to use in TensorBoard + value: TensorFlow expression or python value to track + passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. + + Example use of the passthru mechanism: + + n = autosummary('l2loss', loss, passthru=n) + + This is a shorthand for the following code: + + with tf.control_dependencies([autosummary('l2loss', loss)]): + n = tf.identity(n) + """ + tfutil.assert_tf_initialized() + name_id = name.replace("/", "_") + + if tfutil.is_tf_expression(value): + with tf.name_scope("summary_" + name_id), tf.device(value.device): + condition = tf.convert_to_tensor(condition, name='condition') + update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) + with tf.control_dependencies([update_op]): + return tf.identity(value if passthru is None else passthru) + + else: # python scalar or numpy array + assert not tfutil.is_tf_expression(passthru) + assert not tfutil.is_tf_expression(condition) + if condition: + if name not in _immediate: + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(_dtype) + update_op = _create_var(name, update_value) + _immediate[name] = update_op, update_value + update_op, update_value = _immediate[name] + tfutil.run(update_op, {update_value: value}) + return value if passthru is None else passthru + + +def finalize_autosummaries() -> None: + """Create the necessary ops to include autosummaries in TensorBoard report. + Note: This should be done only once per graph. + """ + global _finalized + tfutil.assert_tf_initialized() + + if _finalized: + return None + + _finalized = True + tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) + + # Create summary ops. + with tf.device(None), tf.control_dependencies(None): + for name, vars_list in _vars.items(): + name_id = name.replace("/", "_") + with tfutil.absolute_name_scope("Autosummary/" + name_id): + moments = tf.add_n(vars_list) + moments /= moments[0] + with tf.control_dependencies([moments]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + mean = moments[1] + std = tf.sqrt(moments[2] - tf.square(moments[1])) + tf.summary.scalar(name, mean) + if enable_custom_scalars: + tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) + tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) + + # Setup layout for custom scalars. + layout = None + if enable_custom_scalars: + cat_dict = OrderedDict() + for series_name in sorted(_vars.keys()): + p = series_name.split("/") + cat = p[0] if len(p) >= 2 else "" + chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] + if cat not in cat_dict: + cat_dict[cat] = OrderedDict() + if chart not in cat_dict[cat]: + cat_dict[cat][chart] = [] + cat_dict[cat][chart].append(series_name) + categories = [] + for cat_name, chart_dict in cat_dict.items(): + charts = [] + for chart_name, series_names in chart_dict.items(): + series = [] + for series_name in series_names: + series.append(layout_pb2.MarginChartContent.Series( + value=series_name, + lower="xCustomScalars/" + series_name + "/margin_lo", + upper="xCustomScalars/" + series_name + "/margin_hi")) + margin = layout_pb2.MarginChartContent(series=series) + charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) + categories.append(layout_pb2.Category(title=cat_name, chart=charts)) + layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) + return layout + +def save_summaries(file_writer, global_step=None): + """Call FileWriter.add_summary() with all summaries in the default graph, + automatically finalizing and merging them on the first call. + """ + global _merge_op + tfutil.assert_tf_initialized() + + if _merge_op is None: + layout = finalize_autosummaries() + if layout is not None: + file_writer.add_summary(layout) + with tf.device(None), tf.control_dependencies(None): + _merge_op = tf.summary.merge_all() + + file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/custom_ops.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..ed31b7691546b844195321283fe52129d6ce9525 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/custom_ops.py @@ -0,0 +1,181 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""TensorFlow custom ops builder. +""" + +import glob +import os +import re +import uuid +import hashlib +import tempfile +import shutil +import tensorflow as tf +from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module + +from .. import util + +#---------------------------------------------------------------------------- +# Global options. + +cuda_cache_path = None +cuda_cache_version_tag = 'v1' +do_not_hash_included_headers = True # Speed up compilation by assuming that headers included by the CUDA code never change. +verbose = True # Print status messages to stdout. + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) + if hostx64_paths != []: + return hostx64_paths[0] + hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) + if hostx64_paths != []: + return hostx64_paths[0] + hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True) + if hostx64_paths != []: + return hostx64_paths[0] + vc_bin_dir = 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin' + if os.path.isdir(vc_bin_dir): + return vc_bin_dir + return None + +def _get_compute_cap(device): + caps_str = device.physical_device_desc + m = re.search('compute capability: (\\d+).(\\d+)', caps_str) + major = m.group(1) + minor = m.group(2) + return (major, minor) + +def _get_cuda_gpu_arch_string(): + gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] + if len(gpus) == 0: + raise RuntimeError('No GPU devices found') + (major, minor) = _get_compute_cap(gpus[0]) + return 'sm_%s%s' % (major, minor) + +def _run_cmd(cmd): + with os.popen(cmd) as pipe: + output = pipe.read() + status = pipe.close() + if status is not None: + raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) + +def _prepare_nvcc_cli(opts): + cmd = 'nvcc ' + opts.strip() + cmd += ' --disable-warnings' + cmd += ' --include-path "%s"' % tf.sysconfig.get_include() + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') + + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + # Require that _find_compiler_bindir succeeds on Windows. Allow + # nvcc to use whatever is the default on Linux. + if os.name == 'nt': + raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) + else: + cmd += ' --compiler-bindir "%s"' % compiler_bindir + cmd += ' 2>&1' + return cmd + +#---------------------------------------------------------------------------- +# Main entry point. + +_plugin_cache = dict() + +def get_plugin(cuda_file, extra_nvcc_options=[]): + cuda_file_base = os.path.basename(cuda_file) + cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) + + # Already in cache? + if cuda_file in _plugin_cache: + return _plugin_cache[cuda_file] + + # Setup plugin. + if verbose: + print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) + try: + # Hash CUDA source. + md5 = hashlib.md5() + with open(cuda_file, 'rb') as f: + md5.update(f.read()) + md5.update(b'\n') + + # Hash headers included by the CUDA code by running it through the preprocessor. + if not do_not_hash_included_headers: + if verbose: + print('Preprocessing... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) + _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) + with open(tmp_file, 'rb') as f: + bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros + good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') + for ln in f: + if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas + ln = ln.replace(bad_file_str, good_file_str) + md5.update(ln) + md5.update(b'\n') + + # Select compiler options. + compile_opts = '' + if os.name == 'nt': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') + elif os.name == 'posix': + compile_opts += f' --compiler-options \'-fPIC\'' + compile_opts += f' --compiler-options \'{" ".join(tf.sysconfig.get_compile_flags())}\'' + compile_opts += f' --linker-options \'{" ".join(tf.sysconfig.get_link_flags())}\'' + else: + assert False # not Windows or Linux, w00t? + compile_opts += f' --gpu-architecture={_get_cuda_gpu_arch_string()}' + compile_opts += ' --use_fast_math' + for opt in extra_nvcc_options: + compile_opts += ' ' + opt + nvcc_cmd = _prepare_nvcc_cli(compile_opts) + + # Hash build configuration. + md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') + md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') + md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') + + # Compile if not already compiled. + cache_dir = util.make_cache_dir_path('tflib-cudacache') if cuda_cache_path is None else cuda_cache_path + bin_file_ext = '.dll' if os.name == 'nt' else '.so' + bin_file = os.path.join(cache_dir, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) + if not os.path.isfile(bin_file): + if verbose: + print('Compiling... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) + _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) + os.makedirs(cache_dir, exist_ok=True) + intermediate_file = os.path.join(cache_dir, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) + shutil.copyfile(tmp_file, intermediate_file) + os.rename(intermediate_file, bin_file) # atomic + + # Load. + if verbose: + print('Loading... ', end='', flush=True) + plugin = tf.load_op_library(bin_file) + + # Add to cache. + _plugin_cache[cuda_file] = plugin + if verbose: + print('Done.', flush=True) + return plugin + + except: + if verbose: + print('Failed!', flush=True) + raise + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/network.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/network.py new file mode 100644 index 0000000000000000000000000000000000000000..2c5a60beb488f1bc878e90e0bb1ada7288c16cfb --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/network.py @@ -0,0 +1,781 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Helper for managing networks.""" + +import types +import inspect +import re +import uuid +import sys +import copy +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import Any, List, Tuple, Union, Callable + +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +# pylint: disable=protected-access +# pylint: disable=attribute-defined-outside-init +# pylint: disable=too-many-public-methods + +_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_import_module_src = dict() # Source code for temporary modules created during pickle import. + + +def import_handler(handler_func): + """Function decorator for declaring custom import handlers.""" + _import_handlers.append(handler_func) + return handler_func + + +class Network: + """Generic network abstraction. + + Acts as a convenience wrapper for a parameterized network construction + function, providing several utility methods and convenient access to + the inputs/outputs/weights. + + Network objects can be safely pickled and unpickled for long-term + archival purposes. The pickling works reliably as long as the underlying + network construction function is defined in a standalone Python module + that has no side effects or application-specific imports. + + Args: + name: Network name. Used to select TensorFlow name and variable scopes. Defaults to build func name if None. + func_name: Fully qualified name of the underlying network construction function, or a top-level function object. + static_kwargs: Keyword arguments to be passed in to the network construction function. + """ + + def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): + # Locate the user-specified build function. + assert isinstance(func_name, str) or util.is_top_level_function(func_name) + if util.is_top_level_function(func_name): + func_name = util.get_top_level_function_name(func_name) + module, func_name = util.get_module_from_obj_name(func_name) + func = util.get_obj_from_module(module, func_name) + + # Dig up source code for the module containing the build function. + module_src = _import_module_src.get(module, None) + if module_src is None: + module_src = inspect.getsource(module) + + # Initialize fields. + self._init_fields(name=(name or func_name), static_kwargs=static_kwargs, build_func=func, build_func_name=func_name, build_module_src=module_src) + + def _init_fields(self, name: str, static_kwargs: dict, build_func: Callable, build_func_name: str, build_module_src: str) -> None: + tfutil.assert_tf_initialized() + assert isinstance(name, str) + assert len(name) >= 1 + assert re.fullmatch(r"[A-Za-z0-9_.\\-]*", name) + assert isinstance(static_kwargs, dict) + assert util.is_pickleable(static_kwargs) + assert callable(build_func) + assert isinstance(build_func_name, str) + assert isinstance(build_module_src, str) + + # Choose TensorFlow name scope. + with tf.name_scope(None): + scope = tf.get_default_graph().unique_name(name, mark_as_used=True) + + # Query current TensorFlow device. + with tfutil.absolute_name_scope(scope), tf.control_dependencies(None): + device = tf.no_op(name="_QueryDevice").device + + # Immutable state. + self._name = name + self._scope = scope + self._device = device + self._static_kwargs = util.EasyDict(copy.deepcopy(static_kwargs)) + self._build_func = build_func + self._build_func_name = build_func_name + self._build_module_src = build_module_src + + # State before _init_graph(). + self._var_inits = dict() # var_name => initial_value, set to None by _init_graph() + self._all_inits_known = False # Do we know for sure that _var_inits covers all the variables? + self._components = None # subnet_name => Network, None if the components are not known yet + + # Initialized by _init_graph(). + self._input_templates = None + self._output_templates = None + self._own_vars = None + + # Cached values initialized the respective methods. + self._input_shapes = None + self._output_shapes = None + self._input_names = None + self._output_names = None + self._vars = None + self._trainables = None + self._var_global_to_local = None + self._run_cache = dict() + + def _init_graph(self) -> None: + assert self._var_inits is not None + assert self._input_templates is None + assert self._output_templates is None + assert self._own_vars is None + + # Initialize components. + if self._components is None: + self._components = util.EasyDict() + + # Choose build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs["is_template_graph"] = True + build_kwargs["components"] = self._components + + # Override scope and device, and ignore surrounding control dependencies. + with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope), tf.device(self.device), tf.control_dependencies(None): + assert tf.get_variable_scope().name == self.scope + assert tf.get_default_graph().get_name_scope() == self.scope + + # Create input templates. + self._input_templates = [] + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self._input_templates.append(tf.placeholder(tf.float32, name=param.name)) + + # Call build func. + out_expr = self._build_func(*self._input_templates, **build_kwargs) + + # Collect output templates and variables. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self._output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + self._own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) + + # Check for errors. + if len(self._input_templates) == 0: + raise ValueError("Network build func did not list any inputs.") + if len(self._output_templates) == 0: + raise ValueError("Network build func did not return any outputs.") + if any(not tfutil.is_tf_expression(t) for t in self._output_templates): + raise ValueError("Network outputs must be TensorFlow expressions.") + if any(t.shape.ndims is None for t in self._input_templates): + raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") + if any(t.shape.ndims is None for t in self._output_templates): + raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") + if any(not isinstance(comp, Network) for comp in self._components.values()): + raise ValueError("Components of a Network must be Networks themselves.") + if len(self._components) != len(set(comp.name for comp in self._components.values())): + raise ValueError("Components of a Network must have unique names.") + + # Initialize variables. + if len(self._var_inits): + tfutil.set_vars({self._get_vars()[name]: value for name, value in self._var_inits.items() if name in self._get_vars()}) + remaining_inits = [var.initializer for name, var in self._own_vars.items() if name not in self._var_inits] + if self._all_inits_known: + assert len(remaining_inits) == 0 + else: + tfutil.run(remaining_inits) + self._var_inits = None + + @property + def name(self): + """User-specified name string.""" + return self._name + + @property + def scope(self): + """Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.""" + return self._scope + + @property + def device(self): + """Name of the TensorFlow device that the weights of this network reside on. Determined by the current device at construction time.""" + return self._device + + @property + def static_kwargs(self): + """EasyDict of arguments passed to the user-supplied build func.""" + return copy.deepcopy(self._static_kwargs) + + @property + def components(self): + """EasyDict of sub-networks created by the build func.""" + return copy.copy(self._get_components()) + + def _get_components(self): + if self._components is None: + self._init_graph() + assert self._components is not None + return self._components + + @property + def input_shapes(self): + """List of input tensor shapes, including minibatch dimension.""" + if self._input_shapes is None: + self._input_shapes = [t.shape.as_list() for t in self.input_templates] + return copy.deepcopy(self._input_shapes) + + @property + def output_shapes(self): + """List of output tensor shapes, including minibatch dimension.""" + if self._output_shapes is None: + self._output_shapes = [t.shape.as_list() for t in self.output_templates] + return copy.deepcopy(self._output_shapes) + + @property + def input_shape(self): + """Short-hand for input_shapes[0].""" + return self.input_shapes[0] + + @property + def output_shape(self): + """Short-hand for output_shapes[0].""" + return self.output_shapes[0] + + @property + def num_inputs(self): + """Number of input tensors.""" + return len(self.input_shapes) + + @property + def num_outputs(self): + """Number of output tensors.""" + return len(self.output_shapes) + + @property + def input_names(self): + """Name string for each input.""" + if self._input_names is None: + self._input_names = [t.name.split("/")[-1].split(":")[0] for t in self.input_templates] + return copy.copy(self._input_names) + + @property + def output_names(self): + """Name string for each output.""" + if self._output_names is None: + self._output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] + return copy.copy(self._output_names) + + @property + def input_templates(self): + """Input placeholders in the template graph.""" + if self._input_templates is None: + self._init_graph() + assert self._input_templates is not None + return copy.copy(self._input_templates) + + @property + def output_templates(self): + """Output tensors in the template graph.""" + if self._output_templates is None: + self._init_graph() + assert self._output_templates is not None + return copy.copy(self._output_templates) + + @property + def own_vars(self): + """Variables defined by this network (local_name => var), excluding sub-networks.""" + return copy.copy(self._get_own_vars()) + + def _get_own_vars(self): + if self._own_vars is None: + self._init_graph() + assert self._own_vars is not None + return self._own_vars + + @property + def vars(self): + """All variables (local_name => var).""" + return copy.copy(self._get_vars()) + + def _get_vars(self): + if self._vars is None: + self._vars = OrderedDict(self._get_own_vars()) + for comp in self._get_components().values(): + self._vars.update((comp.name + "/" + name, var) for name, var in comp._get_vars().items()) + return self._vars + + @property + def trainables(self): + """All trainable variables (local_name => var).""" + return copy.copy(self._get_trainables()) + + def _get_trainables(self): + if self._trainables is None: + self._trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) + return self._trainables + + @property + def var_global_to_local(self): + """Mapping from variable global names to local names.""" + return copy.copy(self._get_var_global_to_local()) + + def _get_var_global_to_local(self): + if self._var_global_to_local is None: + self._var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) + return self._var_global_to_local + + def reset_own_vars(self) -> None: + """Re-initialize all variables of this network, excluding sub-networks.""" + if self._var_inits is None or self._components is None: + tfutil.run([var.initializer for var in self._get_own_vars().values()]) + else: + self._var_inits.clear() + self._all_inits_known = False + + def reset_vars(self) -> None: + """Re-initialize all variables of this network, including sub-networks.""" + if self._var_inits is None: + tfutil.run([var.initializer for var in self._get_vars().values()]) + else: + self._var_inits.clear() + self._all_inits_known = False + if self._components is not None: + for comp in self._components.values(): + comp.reset_vars() + + def reset_trainables(self) -> None: + """Re-initialize all trainable variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self._get_trainables().values()]) + + def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: + """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s). + The graph is placed on the current TensorFlow device.""" + assert len(in_expr) == self.num_inputs + assert not all(expr is None for expr in in_expr) + self._get_vars() # ensure that all variables have been created + + # Choose build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs.update(dynamic_kwargs) + build_kwargs["is_template_graph"] = False + build_kwargs["components"] = self._components + + # Build TensorFlow graph to evaluate the network. + with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): + assert tf.get_variable_scope().name == self.scope + valid_inputs = [expr for expr in in_expr if expr is not None] + final_inputs = [] + for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): + if expr is not None: + expr = tf.identity(expr, name=name) + else: + expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) + final_inputs.append(expr) + out_expr = self._build_func(*final_inputs, **build_kwargs) + + # Propagate input shapes back to the user-specified expressions. + for expr, final in zip(in_expr, final_inputs): + if isinstance(expr, tf.Tensor): + expr.set_shape(final.shape) + + # Express outputs in the desired format. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + return out_expr + + def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: + """Get the local name of a given variable, without any surrounding name scopes.""" + assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) + global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name + return self._get_var_global_to_local()[global_name] + + def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: + """Find variable by local or global name.""" + assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) + return self._get_vars()[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name + + def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: + """Get the value of a given variable as NumPy array. + Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" + return self.find_var(var_or_local_name).eval() + + def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: + """Set the value of a given variable based on the given NumPy array. + Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" + tfutil.set_vars({self.find_var(var_or_local_name): new_value}) + + def __getstate__(self) -> dict: + """Pickle export.""" + state = dict() + state["version"] = 5 + state["name"] = self.name + state["static_kwargs"] = dict(self.static_kwargs) + state["components"] = dict(self.components) + state["build_module_src"] = self._build_module_src + state["build_func_name"] = self._build_func_name + state["variables"] = list(zip(self._get_own_vars().keys(), tfutil.run(list(self._get_own_vars().values())))) + state["input_shapes"] = self.input_shapes + state["output_shapes"] = self.output_shapes + state["input_names"] = self.input_names + state["output_names"] = self.output_names + return state + + def __setstate__(self, state: dict) -> None: + """Pickle import.""" + + # Execute custom import handlers. + for handler in _import_handlers: + state = handler(state) + + # Get basic fields. + assert state["version"] in [2, 3, 4, 5] + name = state["name"] + static_kwargs = state["static_kwargs"] + build_module_src = state["build_module_src"] + build_func_name = state["build_func_name"] + + # Create temporary module from the imported source code. + module_name = "_tflib_network_import_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _import_module_src[module] = build_module_src + exec(build_module_src, module.__dict__) # pylint: disable=exec-used + build_func = util.get_obj_from_module(module, build_func_name) + + # Initialize fields. + self._init_fields(name=name, static_kwargs=static_kwargs, build_func=build_func, build_func_name=build_func_name, build_module_src=build_module_src) + self._var_inits.update(copy.deepcopy(state["variables"])) + self._all_inits_known = True + self._components = util.EasyDict(state.get("components", {})) + self._input_shapes = copy.deepcopy(state.get("input_shapes", None)) + self._output_shapes = copy.deepcopy(state.get("output_shapes", None)) + self._input_names = copy.deepcopy(state.get("input_names", None)) + self._output_names = copy.deepcopy(state.get("output_names", None)) + + def clone(self, name: str = None, **new_static_kwargs) -> "Network": + """Create a clone of this network with its own copy of the variables.""" + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = object.__new__(Network) + net._init_fields(name=(name or self.name), static_kwargs=static_kwargs, build_func=self._build_func, build_func_name=self._build_func_name, build_module_src=self._build_module_src) + net.copy_vars_from(self) + return net + + def copy_own_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, excluding sub-networks.""" + + # Source has unknown variables or unknown components => init now. + if (src_net._var_inits is not None and not src_net._all_inits_known) or src_net._components is None: + src_net._get_vars() + + # Both networks are inited => copy directly. + if src_net._var_inits is None and self._var_inits is None: + names = [name for name in self._get_own_vars().keys() if name in src_net._get_own_vars()] + tfutil.set_vars(tfutil.run({self._get_vars()[name]: src_net._get_vars()[name] for name in names})) + return + + # Read from source. + if src_net._var_inits is None: + value_dict = tfutil.run(src_net._get_own_vars()) + else: + value_dict = src_net._var_inits + + # Write to destination. + if self._var_inits is None: + tfutil.set_vars({self._get_vars()[name]: value for name, value in value_dict.items() if name in self._get_vars()}) + else: + self._var_inits.update(value_dict) + + def copy_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, including sub-networks.""" + + # Source has unknown variables or unknown components => init now. + if (src_net._var_inits is not None and not src_net._all_inits_known) or src_net._components is None: + src_net._get_vars() + + # Source is inited, but destination components have not been created yet => set as initial values. + if src_net._var_inits is None and self._components is None: + self._var_inits.update(tfutil.run(src_net._get_vars())) + return + + # Destination has unknown components => init now. + if self._components is None: + self._get_vars() + + # Both networks are inited => copy directly. + if src_net._var_inits is None and self._var_inits is None: + names = [name for name in self._get_vars().keys() if name in src_net._get_vars()] + tfutil.set_vars(tfutil.run({self._get_vars()[name]: src_net._get_vars()[name] for name in names})) + return + + # Copy recursively, component by component. + self.copy_own_vars_from(src_net) + for name, src_comp in src_net._components.items(): + if name in self._components: + self._components[name].copy_vars_from(src_comp) + + def copy_trainables_from(self, src_net: "Network") -> None: + """Copy the values of all trainable variables from the given network, including sub-networks.""" + names = [name for name in self._get_trainables().keys() if name in src_net._get_trainables()] + tfutil.set_vars(tfutil.run({self._get_vars()[name]: src_net._get_vars()[name] for name in names})) + + def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": + """Create new network with the given parameters, and copy all variables from this network.""" + if new_name is None: + new_name = self.name + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = Network(name=new_name, func_name=new_func_name, **static_kwargs) + net.copy_vars_from(self) + return net + + def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: + """Construct a TensorFlow op that updates the variables of this network + to be slightly closer to those of the given network.""" + with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): + ops = [] + for name, var in self._get_vars().items(): + if name in src_net._get_vars(): + cur_beta = beta if var.trainable else beta_nontrainable + new_value = tfutil.lerp(src_net._get_vars()[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + def run(self, + *in_arrays: Tuple[Union[np.ndarray, None], ...], + input_transform: dict = None, + output_transform: dict = None, + return_as_list: bool = False, + print_progress: bool = False, + minibatch_size: int = None, + num_gpus: int = 1, + assume_frozen: bool = False, + **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: + """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + + Args: + input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the input + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the output + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress: Print progress to the console? Useful for very large input arrays. + minibatch_size: Maximum minibatch size to use, None = disable batching. + num_gpus: Number of GPUs to use. + assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. + dynamic_kwargs: Additional keyword arguments to be passed into the network build function. + """ + assert len(in_arrays) == self.num_inputs + assert not all(arr is None for arr in in_arrays) + assert input_transform is None or util.is_top_level_function(input_transform["func"]) + assert output_transform is None or util.is_top_level_function(output_transform["func"]) + output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + + # Construct unique hash key from all arguments that affect the TensorFlow graph. + key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) + def unwind_key(obj): + if isinstance(obj, dict): + return [(key, unwind_key(value)) for key, value in sorted(obj.items())] + if callable(obj): + return util.get_top_level_function_name(obj) + return obj + key = repr(unwind_key(key)) + + # Build graph. + if key not in self._run_cache: + with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): + with tf.device("/cpu:0"): + in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) + + out_split = [] + for gpu in range(num_gpus): + with tf.device(self.device if num_gpus == 1 else "/gpu:%d" % gpu): + net_gpu = self.clone() if assume_frozen else self + in_gpu = in_split[gpu] + + if input_transform is not None: + in_kwargs = dict(input_transform) + in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) + in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) + + assert len(in_gpu) == self.num_inputs + out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) + + if output_transform is not None: + out_kwargs = dict(output_transform) + out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) + out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) + + assert len(out_gpu) == self.num_outputs + out_split.append(out_gpu) + + with tf.device("/cpu:0"): + out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + self._run_cache[key] = in_expr, out_expr + + # Run minibatches. + in_expr, out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] + + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print("\r%d / %d" % (mb_begin, num_items), end="") + + mb_end = min(mb_begin + minibatch_size, num_items) + mb_num = mb_end - mb_begin + mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] + mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) + + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin: mb_end] = src + + # Done. + if print_progress: + print("\r%d / %d" % (num_items, num_items)) + + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + def list_ops(self) -> List[TfExpression]: + _ = self.output_templates # ensure that the template graph has been created + include_prefix = self.scope + "/" + exclude_prefix = include_prefix + "_" + ops = tf.get_default_graph().get_operations() + ops = [op for op in ops if op.name.startswith(include_prefix)] + ops = [op for op in ops if not op.name.startswith(exclude_prefix)] + return ops + + def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: + """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to + individual layers of the network. Mainly intended to be used for reporting.""" + layers = [] + + def recurse(scope, parent_ops, parent_vars, level): + if len(parent_ops) == 0 and len(parent_vars) == 0: + return + + # Ignore specific patterns. + if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): + return + + # Filter ops and vars by scope. + global_prefix = scope + "/" + local_prefix = global_prefix[len(self.scope) + 1:] + cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] + cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] + if not cur_ops and not cur_vars: + return + + # Filter out all ops related to variables. + for var in [op for op in cur_ops if op.type.startswith("Variable")]: + var_prefix = var.name + "/" + cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] + + # Scope does not contain ops as immediate children => recurse deeper. + contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) + if (level == 0 or not contains_direct_ops) and (len(cur_ops) != 0 or len(cur_vars) != 0): + visited = set() + for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: + token = rel_name.split("/")[0] + if token not in visited: + recurse(global_prefix + token, cur_ops, cur_vars, level + 1) + visited.add(token) + return + + # Report layer. + layer_name = scope[len(self.scope) + 1:] + layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] + layer_trainables = [var for _name, var in cur_vars if var.trainable] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, self.list_ops(), list(self._get_vars().items()), 0) + return layers + + def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: + """Print a summary table of the network structure.""" + rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] + rows += [["---"] * 4] + total_params = 0 + + for layer_name, layer_output, layer_trainables in self.list_layers(): + num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) + weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] + weights.sort(key=lambda x: len(x.name)) + if len(weights) == 0 and len(layer_trainables) == 1: + weights = layer_trainables + total_params += num_params + + if not hide_layers_with_no_params or num_params != 0: + num_params_str = str(num_params) if num_params > 0 else "-" + output_shape_str = str(layer_output.shape) + weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" + rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] + + rows += [["---"] * 4] + rows += [["Total", str(total_params), "", ""]] + + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) + print() + + def setup_weight_histograms(self, title: str = None) -> None: + """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" + if title is None: + title = self.name + + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for local_name, var in self._get_trainables().items(): + if "/" in local_name: + p = local_name.split("/") + name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) + else: + name = title + "_toplevel/" + local_name + + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- +# Backwards-compatible emulation of legacy output transformation in Network.run(). + +_print_legacy_warning = True + +def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): + global _print_legacy_warning + legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] + if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): + return output_transform, dynamic_kwargs + + if _print_legacy_warning: + _print_legacy_warning = False + print() + print("WARNING: Old-style output transformations in Network.run() are deprecated.") + print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") + print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") + print() + assert output_transform is None + + new_kwargs = dict(dynamic_kwargs) + new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} + new_transform["func"] = _legacy_output_transform_func + return new_transform, new_kwargs + +def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): + if out_mul != 1.0: + expr = [x * out_mul for x in expr] + + if out_add != 0.0: + expr = [x + out_add for x in expr] + + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] + + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + expr = [tf.round(x) for x in expr] + expr = [tf.saturate_cast(x, out_dtype) for x in expr] + return expr diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c61c745d36a1e35568fe4310c780c34414173e0 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/fused_bias_act.cu b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/fused_bias_act.cu new file mode 100644 index 0000000000000000000000000000000000000000..0268f14395319003240b4a5a59141d703e9a4257 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/fused_bias_act.cu @@ -0,0 +1,220 @@ +// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +//------------------------------------------------------------------------ +// CUDA kernel. + +template +struct FusedBiasActKernelParams +{ + const T* x; // [sizeX] + const T* b; // [sizeB] or NULL + const T* xref; // [sizeX] or NULL + const T* yref; // [sizeX] or NULL + T* y; // [sizeX] + + int grad; + int axis; + int act; + float alpha; + float gain; + float clamp; + + int sizeX; + int sizeB; + int stepB; + int loopX; +}; + +template +static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p) +{ + const float expRange = 80.0f; + const float halfExpRange = 40.0f; + const float seluScale = 1.0507009873554804934193349852946f; + const float seluAlpha = 1.6732632423543772848170429916717f; + + // Loop over elements. + int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; + for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) + { + // Load and apply bias. + float x = (float)p.x[xi]; + if (p.b) + x += (float)p.b[(xi / p.stepB) % p.sizeB]; + float xref = (p.xref) ? (float)p.xref[xi] : 0.0f; + float yref = (p.yref) ? (float)p.yref[xi] : 0.0f; + float yy = (p.gain != 0.0f) ? yref / p.gain : 0.0f; + + // Evaluate activation func. + float y; + switch (p.act * 10 + p.grad) + { + // linear + default: + case 10: y = x; break; + case 11: y = x; break; + case 12: y = 0.0f; break; + + // relu + case 20: y = (x > 0.0f) ? x : 0.0f; break; + case 21: y = (yy > 0.0f) ? x : 0.0f; break; + case 22: y = 0.0f; break; + + // lrelu + case 30: y = (x > 0.0f) ? x : x * p.alpha; break; + case 31: y = (yy > 0.0f) ? x : x * p.alpha; break; + case 32: y = 0.0f; break; + + // tanh + case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; + case 41: y = x * (1.0f - yy * yy); break; + case 42: y = x * (1.0f - yy * yy) * (-2.0f * yy); break; + + // sigmoid + case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; + case 51: y = x * yy * (1.0f - yy); break; + case 52: y = x * yy * (1.0f - yy) * (1.0f - 2.0f * yy); break; + + // elu + case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; + case 61: y = (yy >= 0.0f) ? x : x * (yy + 1.0f); break; + case 62: y = (yy >= 0.0f) ? 0.0f : x * (yy + 1.0f); break; + + // selu + case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; + case 71: y = (yy >= 0.0f) ? x * seluScale : x * (yy + seluScale * seluAlpha); break; + case 72: y = (yy >= 0.0f) ? 0.0f : x * (yy + seluScale * seluAlpha); break; + + // softplus + case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; + case 81: y = x * (1.0f - expf(-yy)); break; + case 82: { float c = expf(-yy); y = x * c * (1.0f - c); } break; + + // swish + case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; + case 91: + case 92: + { + float c = expf(xref); + float d = c + 1.0f; + if (p.grad == 1) + y = (xref > halfExpRange) ? x : x * c * (xref + d) / (d * d); + else + y = (xref > halfExpRange) ? 0.0f : x * c * (xref * (2.0f - d) + 2.0f * d) / (d * d * d); + yref = (xref < -expRange) ? 0.0f : xref / (expf(-xref) + 1.0f) * p.gain; + } + break; + } + + // Apply gain. + y *= p.gain; + + // Clamp. + if (p.clamp >= 0.0f) + { + if (p.grad == 0) + y = (fabsf(y) < p.clamp) ? y : (y >= 0.0f) ? p.clamp : -p.clamp; + else + y = (fabsf(yref) < p.clamp) ? y : 0.0f; + } + + // Store. + p.y[xi] = (T)y; + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct FusedBiasActOp : public OpKernel +{ + FusedBiasActKernelParams m_attribs; + + FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("clamp", &m_attribs.clamp)); + OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); + OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); + OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); + } + + void Compute(OpKernelContext* ctx) + { + FusedBiasActKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [...] + const Tensor& b = ctx->input(1); // [sizeB] or [0] + const Tensor& xref = ctx->input(2); // x.shape or [0] + const Tensor& yref = ctx->input(3); // x.shape or [0] + p.x = x.flat().data(); + p.b = (b.NumElements()) ? b.flat().data() : NULL; + p.xref = (xref.NumElements()) ? xref.flat().data() : NULL; + p.yref = (yref.NumElements()) ? yref.flat().data() : NULL; + OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); + OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); + OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); + OP_REQUIRES(ctx, xref.NumElements() == 0 || xref.NumElements() == x.NumElements(), errors::InvalidArgument("xref has wrong number of elements")); + OP_REQUIRES(ctx, yref.NumElements() == 0 || yref.NumElements() == x.NumElements(), errors::InvalidArgument("yref has wrong number of elements")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); + + p.sizeX = (int)x.NumElements(); + p.sizeB = (int)b.NumElements(); + p.stepB = 1; + for (int i = m_attribs.axis + 1; i < x.dims(); i++) + p.stepB *= (int)x.dim_size(i); + + Tensor* y = NULL; // x.shape + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); + p.y = y->flat().data(); + + p.loopX = 4; + int blockSize = 4 * 32; + int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("FusedBiasAct") + .Input ("x: T") + .Input ("b: T") + .Input ("xref: T") + .Input ("yref: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("grad: int = 0") + .Attr ("axis: int = 1") + .Attr ("act: int = 0") + .Attr ("alpha: float = 0.0") + .Attr ("gain: float = 1.0") + .Attr ("clamp: float = -1.0"); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/fused_bias_act.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/fused_bias_act.py new file mode 100644 index 0000000000000000000000000000000000000000..79991b0497d3d92f25194a31668b9568048163f8 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/fused_bias_act.py @@ -0,0 +1,211 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom TensorFlow ops for efficient bias and activation.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops +from ...util import EasyDict + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +activation_funcs = { + 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), + 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), + 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), + 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), + 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), + 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), + 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), + 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), + 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), +} + +#---------------------------------------------------------------------------- + +def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, clamp=None, impl='cuda'): + r"""Fused bias and activation function. + + Adds bias `b` to activation tensor `x`, evaluates activation function `act`, + and scales the result by `gain`. Each of the steps is optional. In most cases, + the fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports first and second order gradients, + but not third order gradients. + + Args: + x: Input activation tensor. Can have any shape, but if `b` is defined, the + dimension corresponding to `axis`, as well as the rank, must be known. + b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type + as `x`. The shape must be known, and it must match the dimension of `x` + corresponding to `axis`. + axis: The dimension in `x` corresponding to the elements of `b`. + The value of `axis` is ignored if `b` is not specified. + act: Name of the activation function to evaluate, or `"linear"` to disable. + Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. + See `activation_funcs` for a full list. `None` is not allowed. + alpha: Shape parameter for the activation function, or `None` to use the default. + gain: Scaling factor for the output tensor, or `None` to use default. + See `activation_funcs` for the default scaling of each activation function. + If unsure, consider specifying `1.0`. + clamp: Clamp the output values to `[-clamp, +clamp]`, or `None` to disable + the clamping (default). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + impl_dict = { + 'ref': _fused_bias_act_ref, + 'cuda': _fused_bias_act_cuda, + } + return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain, clamp=clamp) + +#---------------------------------------------------------------------------- + +def _fused_bias_act_ref(x, b, axis, act, alpha, gain, clamp): + """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Add bias. + if b.shape[0] != 0: + x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) + + # Evaluate activation function. + x = act_spec.func(x, alpha=alpha) + + # Scale by gain. + if gain != 1: + x *= gain + + # Clamp. + if clamp is not None: + clamp = np.asarray(clamp, dtype=x.dtype.name) + assert clamp.shape == () and clamp >= 0 + x = tf.clip_by_value(x, -clamp, clamp) + return x + +#---------------------------------------------------------------------------- + +def _fused_bias_act_cuda(x, b, axis, act, alpha, gain, clamp): + """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + empty_tensor = tf.constant([], dtype=x.dtype) + b = tf.convert_to_tensor(b) if b is not None else empty_tensor + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Special cases. + if act == 'linear' and b is None and gain == 1.0: + return x + if act_spec.cuda_idx is None: + return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain, clamp=clamp) + + # CUDA op. + cuda_op = _get_plugin().fused_bias_act + cuda_kwargs = dict(axis=int(axis), act=int(act_spec.cuda_idx), gain=float(gain)) + if alpha is not None: + cuda_kwargs['alpha'] = float(alpha) + if clamp is not None: + clamp = np.asarray(clamp, dtype=x.dtype.name) + assert clamp.shape == () and clamp >= 0 + cuda_kwargs['clamp'] = float(clamp.astype(np.float32)) + def ref(tensor, name): + return tensor if act_spec.ref == name else empty_tensor + + # Forward pass: y = func(x, b). + def func_y(x, b): + y = cuda_op(x=x, b=b, xref=empty_tensor, yref=empty_tensor, grad=0, **cuda_kwargs) + y.set_shape(x.shape) + return y + + # Backward pass: dx, db = grad(dy, x, y) + def grad_dx(dy, x, y): + dx = cuda_op(x=dy, b=empty_tensor, xref=ref(x,'x'), yref=ref(y,'y'), grad=1, **cuda_kwargs) + dx.set_shape(x.shape) + return dx + def grad_db(dx): + if b.shape[0] == 0: + return empty_tensor + db = dx + if axis < x.shape.rank - 1: + db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) + if axis > 0: + db = tf.reduce_sum(db, list(range(axis))) + db.set_shape(b.shape) + return db + + # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) + def grad2_d_dy(d_dx, d_db, x, y): + d_dy = cuda_op(x=d_dx, b=d_db, xref=ref(x,'x'), yref=ref(y,'y'), grad=1, **cuda_kwargs) + d_dy.set_shape(x.shape) + return d_dy + def grad2_d_x(d_dx, d_db, x, y): + d_x = cuda_op(x=d_dx, b=d_db, xref=ref(x,'x'), yref=ref(y,'y'), grad=2, **cuda_kwargs) + d_x.set_shape(x.shape) + return d_x + + # Fast version for piecewise-linear activation funcs. + @tf.custom_gradient + def func_zero_2nd_grad(x, b): + y = func_y(x, b) + @tf.custom_gradient + def grad(dy): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + return d_dy + return (dx, db), grad2 + return y, grad + + # Slow version for general activation funcs. + @tf.custom_gradient + def func_nonzero_2nd_grad(x, b): + y = func_y(x, b) + def grad_wrap(dy): + @tf.custom_gradient + def grad_impl(dy, x): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + d_x = grad2_d_x(d_dx, d_db, x, y) + return d_dy, d_x + return (dx, db), grad2 + return grad_impl(dy, x) + return y, grad_wrap + + # Which version to use? + if act_spec.zero_2nd_grad: + return func_zero_2nd_grad(x, b) + return func_nonzero_2nd_grad(x, b) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/upfirdn_2d.cu b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/upfirdn_2d.cu new file mode 100644 index 0000000000000000000000000000000000000000..7aad60d53e57d4f3e60f36a24df80a6278f1bb63 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/upfirdn_2d.cu @@ -0,0 +1,359 @@ +// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +//------------------------------------------------------------------------ +// Helpers. + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +static __host__ __device__ __forceinline__ int floorDiv(int a, int b) +{ + int t = 1 - a / b; + return (a + t * b) / b - t; +} + +//------------------------------------------------------------------------ +// CUDA kernel params. + +template +struct UpFirDn2DKernelParams +{ + const T* x; // [majorDim, inH, inW, minorDim] + const T* k; // [kernelH, kernelW] + T* y; // [majorDim, outH, outW, minorDim] + + int upx; + int upy; + int downx; + int downy; + int padx0; + int padx1; + int pady0; + int pady1; + + int majorDim; + int inH; + int inW; + int minorDim; + int kernelH; + int kernelW; + int outH; + int outW; + int loopMajor; + int loopX; +}; + +//------------------------------------------------------------------------ +// General CUDA implementation for large filter kernels. + +template +static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p) +{ + // Calculate thread index. + int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; + int outY = minorIdx / p.minorDim; + minorIdx -= outY * p.minorDim; + int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) + return; + + // Setup Y receptive field. + int midY = outY * p.downy + p.upy - 1 - p.pady0; + int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); + int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; + int kernelY = midY + p.kernelH - (inY + 1) * p.upy; + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) + { + // Setup X receptive field. + int midX = outX * p.downx + p.upx - 1 - p.padx0; + int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); + int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; + int kernelX = midX + p.kernelW - (inX + 1) * p.upx; + + // Initialize pointers. + const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + const T* kp = &p.k[kernelY * p.kernelW + kernelX]; + int xpx = p.minorDim; + int kpx = -p.upx; + int xpy = p.inW * p.minorDim; + int kpy = -p.upy * p.kernelW; + + // Inner loop. + float v = 0.0f; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + v += (float)(*xp) * (float)(*kp); + xp += xpx; + kp += kpx; + } + xp += xpy - w * xpx; + kp += kpy - w * kpx; + } + + // Store result. + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } +} + +//------------------------------------------------------------------------ +// Specialized CUDA implementation for small filter kernels. + +template +static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p) +{ + //assert(kernelW % upx == 0); + //assert(kernelH % upy == 0); + const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; + const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; + __shared__ volatile float sk[kernelH][kernelW]; + __shared__ volatile float sx[tileInH][tileInW]; + + // Calculate tile index. + int minorIdx = blockIdx.x; + int tileOutY = minorIdx / p.minorDim; + minorIdx -= tileOutY * p.minorDim; + tileOutY *= tileOutH; + int tileOutXBase = blockIdx.y * p.loopX * tileOutW; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) + return; + + // Load filter kernel (flipped). + for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) + { + int ky = tapIdx / kernelW; + int kx = tapIdx - ky * kernelW; + float v = 0.0f; + if (kx < p.kernelW & ky < p.kernelH) + v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; + sk[ky][kx] = v; + } + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) + { + // Load input pixels. + int tileMidX = tileOutX * downx + upx - 1 - p.padx0; + int tileMidY = tileOutY * downy + upy - 1 - p.pady0; + int tileInX = floorDiv(tileMidX, upx); + int tileInY = floorDiv(tileMidY, upy); + __syncthreads(); + for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) + { + int relInY = inIdx / tileInW; + int relInX = inIdx - relInY * tileInW; + int inX = relInX + tileInX; + int inY = relInY + tileInY; + float v = 0.0f; + if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) + v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + sx[relInY][relInX] = v; + } + + // Loop over output pixels. + __syncthreads(); + for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) + { + int relOutY = outIdx / tileOutW; + int relOutX = outIdx - relOutY * tileOutW; + int outX = relOutX + tileOutX; + int outY = relOutY + tileOutY; + + // Setup receptive field. + int midX = tileMidX + relOutX * downx; + int midY = tileMidY + relOutY * downy; + int inX = floorDiv(midX, upx); + int inY = floorDiv(midY, upy); + int relInX = inX - tileInX; + int relInY = inY - tileInY; + int kernelX = (inX + 1) * upx - midX - 1; // flipped + int kernelY = (inY + 1) * upy - midY - 1; // flipped + + // Inner loop. + float v = 0.0f; + #pragma unroll + for (int y = 0; y < kernelH / upy; y++) + #pragma unroll + for (int x = 0; x < kernelW / upx; x++) + v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; + + // Store result. + if (outX < p.outW & outY < p.outH) + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template +struct UpFirDn2DOp : public OpKernel +{ + UpFirDn2DKernelParams m_attribs; + + UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); + OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); + OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); + } + + void Compute(OpKernelContext* ctx) + { + UpFirDn2DKernelParams p = m_attribs; + cudaStream_t stream = ctx->eigen_device().stream(); + + const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] + const Tensor& k = ctx->input(1); // [kernelH, kernelW] + p.x = x.flat().data(); + p.k = k.flat().data(); + OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); + OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); + OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); + + p.majorDim = (int)x.dim_size(0); + p.inH = (int)x.dim_size(1); + p.inW = (int)x.dim_size(2); + p.minorDim = (int)x.dim_size(3); + p.kernelH = (int)k.dim_size(0); + p.kernelW = (int)k.dim_size(1); + OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); + + p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; + p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; + OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); + + Tensor* y = NULL; // [majorDim, outH, outW, minorDim] + TensorShape ys; + ys.AddDim(p.majorDim); + ys.AddDim(p.outH); + ys.AddDim(p.outW); + ys.AddDim(p.minorDim); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); + p.y = y->flat().data(); + OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); + + // Choose CUDA kernel to use. + void* cudaKernel = (void*)UpFirDn2DKernel_large; + int tileOutW = -1; + int tileOutH = -1; + + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 24 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 20 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 16 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 12 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 24) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 20) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 16) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 12) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 24 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 20 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 16 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 12 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 2 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 128; tileOutH = 8; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 24) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 20) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 16) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 12) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + if (p.upx == 1 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 1 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 32; } + + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 24 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 20 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 16 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 12 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 1 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 24) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 20) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 16) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 12) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 2 && p.kernelW <= 1 && p.kernelH <= 8 ) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 16; } + + // Choose launch params. + dim3 blockSize; + dim3 gridSize; + if (tileOutW > 0 && tileOutH > 0) // small + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 1; + blockSize = dim3(32 * 8, 1, 1); + gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + else // large + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 4; + blockSize = dim3(4, 32, 1); + gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + + // Launch CUDA kernel. + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("UpFirDn2D") + .Input ("x: T") + .Input ("k: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("upx: int = 1") + .Attr ("upy: int = 1") + .Attr ("downx: int = 1") + .Attr ("downy: int = 1") + .Attr ("padx0: int = 0") + .Attr ("padx1: int = 0") + .Attr ("pady0: int = 0") + .Attr ("pady1: int = 0"); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); + +//------------------------------------------------------------------------ diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/upfirdn_2d.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/upfirdn_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..55a31af7e146da7afeb964db018f14aca3134920 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/ops/upfirdn_2d.py @@ -0,0 +1,418 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Custom TensorFlow ops for efficient resampling of 2D images.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'): + r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. + + Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` + and performs the following operations for each image, batched across + `majorDim` and `minorDim`: + + 1. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). + + 2. Pad the image with zeros by the specified number of pixels on each side + (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value + corresponds to cropping the image. + + 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the + image so that the footprint of all output pixels lies within the input image. + + 4. Downsample the image by throwing away pixels (`downx`, `downy`). + + This sequence of operations bears close resemblance to scipy.signal.upfirdn(). + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. + k: 2D FIR filter of the shape `[firH, firW]`. + upx: Integer upsampling factor along the X-axis (default: 1). + upy: Integer upsampling factor along the Y-axis (default: 1). + downx: Integer downsampling factor along the X-axis (default: 1). + downy: Integer downsampling factor along the Y-axis (default: 1). + padx0: Number of pixels to pad on the left side (default: 0). + padx1: Number of pixels to pad on the right side (default: 0). + pady0: Number of pixels to pad on the top side (default: 0). + pady1: Number of pixels to pad on the bottom side (default: 0). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. + """ + + impl_dict = { + 'ref': _upfirdn_2d_ref, + 'cuda': _upfirdn_2d_cuda, + } + return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + assert x.shape.rank == 4 + inH = x.shape[1].value + inW = x.shape[2].value + minorDim = _shape(x, 3) + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + # Upsample (insert zeros). + x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) + x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) + x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) + + # Pad (crop if negative). + x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) + x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] + + # Convolve with filter. + x = tf.transpose(x, [0, 3, 1, 2]) + x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) + w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) + x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') + x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) + x = tf.transpose(x, [0, 2, 3, 1]) + + # Downsample (throw away pixels). + return x[:, ::downy, ::downx, :] + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + majorDim, inH, inW, minorDim = x.shape.as_list() + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 + outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 + assert outW >= 1 and outH >= 1 + + cuda_op = _get_plugin().up_fir_dn2d + kc = tf.constant(k, dtype=x.dtype) + gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) + gpadx0 = kernelW - padx0 - 1 + gpady0 = kernelH - pady0 - 1 + gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 + gpady1 = inH * upy - outH * downy + pady0 - upy + 1 + + @tf.custom_gradient + def func(x): + y = cuda_op(x=x, k=kc, upx=int(upx), upy=int(upy), downx=int(downx), downy=int(downy), padx0=int(padx0), padx1=int(padx1), pady0=int(pady0), pady1=int(pady1)) + y.set_shape([majorDim, outH, outW, minorDim]) + @tf.custom_gradient + def grad(dy): + dx = cuda_op(x=dy, k=gkc, upx=int(downx), upy=int(downy), downx=int(upx), downy=int(upy), padx0=int(gpadx0), padx1=int(gpadx1), pady0=int(gpady0), pady1=int(gpady1)) + dx.set_shape([majorDim, inH, inW, minorDim]) + return dx, func + return y, grad + return func(x) + +#---------------------------------------------------------------------------- + +def filter_2d(x, k, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Filter a batch of 2D images with the given FIR filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and filters each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + assert isinstance(padding, int) + k = _FilterKernel(k=k, gain=gain) + assert k.w == k.h + pad0 = k.w // 2 + padding + pad1 = (k.w - 1) // 2 + padding + return _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_2d(x, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Upsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and upsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the upsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + k = _FilterKernel(k if k is not None else [1] * factor, gain * (factor ** 2)) + assert k.w == k.h + pad0 = (k.w + factor - 1) // 2 + padding + pad1 = (k.w - factor) // 2 + padding + return _simple_upfirdn_2d(x, k, up=factor, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def downsample_2d(x, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Downsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and downsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the downsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + k = _FilterKernel(k if k is not None else [1] * factor, gain) + assert k.w == k.h + pad0 = (k.w - factor + 1) // 2 + padding * factor + pad1 = (k.w - factor) // 2 + padding * factor + return _simple_upfirdn_2d(x, k, down=factor, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_conv_2d(x, w, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + + # Check weight shape. + w = tf.convert_to_tensor(w) + ch, cw, _inC, _outC = w.shape.as_list() + inC = _shape(w, 2) + outC = _shape(w, 3) + assert cw == ch + + # Fast path for 1x1 convolution. + if cw == 1 and ch == 1: + x = tf.nn.conv2d(x, w, data_format=data_format, strides=[1,1,1,1], padding='VALID') + x = upsample_2d(x, k, factor=factor, gain=gain, padding=padding, data_format=data_format, impl=impl) + return x + + # Setup filter kernel. + k = _FilterKernel(k if k is not None else [1] * factor, gain * (factor ** 2)) + assert k.w == k.h + + # Determine data dimensions. + if data_format == 'NCHW': + stride = [1, 1, factor, factor] + output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + ch, (_shape(x, 3) - 1) * factor + cw] + num_groups = _shape(x, 1) // inC + else: + stride = [1, factor, factor, 1] + output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + ch, (_shape(x, 2) - 1) * factor + cw, outC] + num_groups = _shape(x, 3) // inC + + # Transpose weights. + w = tf.reshape(w, [ch, cw, inC, num_groups, -1]) + w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) + w = tf.reshape(w, [ch, cw, -1, num_groups * inC]) + + # Execute. + x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) + pad0 = (k.w + factor - cw) // 2 + padding + pad1 = (k.w - factor - cw + 3) // 2 + padding + return _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def conv_downsample_2d(x, w, k=None, factor=2, gain=1, padding=0, data_format='NCHW', impl='cuda'): + r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + padding: Number of pixels to pad or crop the output on each side (default: 0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + assert isinstance(padding, int) + + # Check weight shape. + w = tf.convert_to_tensor(w) + ch, cw, _inC, _outC = w.shape.as_list() + assert cw == ch + + # Fast path for 1x1 convolution. + if cw == 1 and ch == 1: + x = downsample_2d(x, k, factor=factor, gain=gain, padding=padding, data_format=data_format, impl=impl) + x = tf.nn.conv2d(x, w, data_format=data_format, strides=[1,1,1,1], padding='VALID') + return x + + # Setup filter kernel. + k = _FilterKernel(k if k is not None else [1] * factor, gain) + assert k.w == k.h + + # Determine stride. + if data_format == 'NCHW': + s = [1, 1, factor, factor] + else: + s = [1, factor, factor, 1] + + # Execute. + pad0 = (k.w - factor + cw) // 2 + padding * factor + pad1 = (k.w - factor + cw - 1) // 2 + padding * factor + x = _simple_upfirdn_2d(x, k, pad0=pad0, pad1=pad1, data_format=data_format, impl=impl) + return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) + +#---------------------------------------------------------------------------- +# Internal helpers. + +class _FilterKernel: + def __init__(self, k, gain=1): + k = np.asarray(k, dtype=np.float32) + k /= np.sum(k) + + # Separable. + if k.ndim == 1 and k.size >= 8: + self.w = k.size + self.h = k.size + self.kx = k[np.newaxis, :] + self.ky = k[:, np.newaxis] * gain + self.kxy = None + + # Non-separable. + else: + if k.ndim == 1: + k = np.outer(k, k) + assert k.ndim == 2 + self.w = k.shape[1] + self.h = k.shape[0] + self.kx = None + self.ky = None + self.kxy = k * gain + +def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'): + assert isinstance(k, _FilterKernel) + assert data_format in ['NCHW', 'NHWC'] + assert x.shape.rank == 4 + y = x + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) + if k.kx is not None: + y = upfirdn_2d(y, k.kx, upx=up, downx=down, padx0=pad0, padx1=pad1, impl=impl) + if k.ky is not None: + y = upfirdn_2d(y, k.ky, upy=up, downy=down, pady0=pad0, pady1=pad1, impl=impl) + if k.kxy is not None: + y = upfirdn_2d(y, k.kxy, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) + return y + +def _shape(tf_expr, dim_idx): + if tf_expr.shape.rank is not None: + dim = tf_expr.shape[dim_idx].value + if dim is not None: + return dim + return tf.shape(tf_expr)[dim_idx] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/optimizer.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..0f8b539ab2def5f78e70a431ceabc6588e4087c0 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/optimizer.py @@ -0,0 +1,372 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Helper wrapper for a Tensorflow optimizer.""" + +import platform +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import List, Union + +from . import autosummary +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_collective_ops_warning_printed = False +_collective_ops_group_key = 831766147 +_collective_ops_instance_key = 436340067 + +class Optimizer: + """A Wrapper for tf.train.Optimizer. + + Automatically takes care of: + - Gradient averaging for multi-GPU training. + - Gradient accumulation for arbitrarily large minibatches. + - Dynamic loss scaling and typecasts for FP16 training. + - Ignoring corrupted gradients that contain NaNs/Infs. + - Reporting statistics. + - Well-chosen default settings. + """ + + def __init__(self, + name: str = "Train", # Name string that will appear in TensorFlow graph. + tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. + learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. + minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. + share: "Optimizer" = None, # Share internal state with a previously created optimizer? + use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? + loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. + loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. + loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. + report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? + **kwargs): + + # Public fields. + self.name = name + self.learning_rate = learning_rate + self.minibatch_multiplier = minibatch_multiplier + self.id = self.name.replace("/", ".") + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = util.get_obj_by_name(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + + # Private fields. + self._updates_applied = False + self._devices = OrderedDict() # device_name => EasyDict() + self._shared_optimizers = OrderedDict() # device_name => optimizer_class + self._gradient_shapes = None # [shape, ...] + self._report_mem_usage = report_mem_usage + + # Validate arguments. + assert callable(self.optimizer_class) + + # Share internal state if requested. + if share is not None: + assert isinstance(share, Optimizer) + assert self.optimizer_class is share.optimizer_class + assert self.learning_rate is share.learning_rate + assert self.optimizer_kwargs == share.optimizer_kwargs + self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access + + def _get_device(self, device_name: str): + """Get internal state for the given TensorFlow device.""" + tfutil.assert_tf_initialized() + if device_name in self._devices: + return self._devices[device_name] + + # Initialize fields. + device = util.EasyDict() + device.name = device_name + device.optimizer = None # Underlying optimizer: optimizer_class + device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable + device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] + device.grad_clean = OrderedDict() # Clean gradients: var => grad + device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable + device.grad_acc_count = None # Accumulation counter: tf.Variable + device.grad_acc = OrderedDict() # Accumulated gradients: var => grad + + # Setup TensorFlow objects. + with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): + if device_name not in self._shared_optimizers: + optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) + self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + device.optimizer = self._shared_optimizers[device_name] + if self.use_loss_scaling: + device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") + + # Register device. + self._devices[device_name] = device + return device + + def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: + """Register the gradients of the given loss function with respect to the given variables. + Intended to be called once per GPU.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + device = self._get_device(loss.device) + + # Validate trainables. + if isinstance(trainable_vars, dict): + trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars + assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 + assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) + assert all(var.device == device.name for var in trainable_vars) + + # Validate shapes. + if self._gradient_shapes is None: + self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] + assert len(trainable_vars) == len(self._gradient_shapes) + assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) + + # Report memory usage if requested. + deps = [loss] + if self._report_mem_usage: + self._report_mem_usage = False + try: + with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): + deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) + except tf.errors.NotFoundError: + pass + + # Compute gradients. + with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage + grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) + + # Register gradients. + for grad, var in grad_list: + if var not in device.grad_raw: + device.grad_raw[var] = [] + device.grad_raw[var].append(grad) + + def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: + """Construct training op to update the registered variables based on their gradients.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + self._updates_applied = True + all_ops = [] + + # Check for no-op. + if allow_no_op and len(self._devices) == 0: + with tfutil.absolute_name_scope(self.scope): + return tf.no_op(name='TrainingOp') + + # Clean up gradients. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): + for var, grad in device.grad_raw.items(): + + # Filter out disconnected gradients and convert to float32. + grad = [g for g in grad if g is not None] + grad = [tf.cast(g, tf.float32) for g in grad] + + # Sum within the device. + if len(grad) == 0: + grad = tf.zeros(var.shape) # No gradients => zero. + elif len(grad) == 1: + grad = grad[0] # Single gradient => use as is. + else: + grad = tf.add_n(grad) # Multiple gradients => sum. + + # Scale as needed. + scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) + scale = tf.constant(scale, dtype=tf.float32, name="scale") + if self.minibatch_multiplier is not None: + scale /= tf.cast(self.minibatch_multiplier, tf.float32) + scale = self.undo_loss_scaling(scale) + device.grad_clean[var] = grad * scale + + # Sum gradients across devices. + if len(self._devices) > 1: + with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): + if platform.system() == "Windows": # Windows => NCCL ops are not available. + self._broadcast_fallback() + elif tf.VERSION.startswith("1.15."): # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539 + self._broadcast_fallback() + else: # Otherwise => NCCL ops are safe to use. + self._broadcast_nccl() + + # Apply updates separately on each device. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): + # pylint: disable=cell-var-from-loop + + # Accumulate gradients over time. + if self.minibatch_multiplier is None: + acc_ok = tf.constant(True, name='acc_ok') + device.grad_acc = OrderedDict(device.grad_clean) + else: + # Create variables. + with tf.control_dependencies(None): + for var in device.grad_clean.keys(): + device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") + device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") + + # Track counter. + count_cur = device.grad_acc_count + 1.0 + count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) + count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) + acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) + all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) + + # Track gradients. + for var, grad in device.grad_clean.items(): + acc_var = device.grad_acc_vars[var] + acc_cur = acc_var + grad + device.grad_acc[var] = acc_cur + with tf.control_dependencies([acc_cur]): + acc_inc_op = lambda: tf.assign(acc_var, acc_cur) + acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) + all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) + + # No overflow => apply gradients. + all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) + apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) + all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) + + # Adjust loss scaling. + if self.use_loss_scaling: + ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) + ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) + ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) + all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) + + # Last device => report statistics. + if device_idx == len(self._devices) - 1: + all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate))) + all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) + if self.use_loss_scaling: + all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) + + # Initialize variables. + self.reset_optimizer_state() + if self.use_loss_scaling: + tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) + if self.minibatch_multiplier is not None: + tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) + + # Group everything into a single op. + with tfutil.absolute_name_scope(self.scope): + return tf.group(*all_ops, name="TrainingOp") + + def reset_optimizer_state(self) -> None: + """Reset internal state of the underlying optimizer.""" + tfutil.assert_tf_initialized() + tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) + + def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: + """Get or create variable representing log2 of the current dynamic loss scaling factor.""" + return self._get_device(device).loss_scaling_var + + def apply_loss_scaling(self, value: TfExpression) -> TfExpression: + """Apply dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) + + def undo_loss_scaling(self, value: TfExpression) -> TfExpression: + """Undo the effect of dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type + + def _broadcast_nccl(self): + """Sum gradients across devices using NCCL ops (fast path).""" + from tensorflow.python.ops import nccl_ops # pylint: disable=no-name-in-module + for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): + if any(x.shape.num_elements() > 0 for x in all_vars): + all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] + all_grads = nccl_ops.all_sum(all_grads) + for device, var, grad in zip(self._devices.values(), all_vars, all_grads): + device.grad_clean[var] = grad + + def _broadcast_fallback(self): + """Sum gradients across devices using TensorFlow collective ops (slow fallback path).""" + from tensorflow.python.ops import collective_ops # pylint: disable=no-name-in-module + global _collective_ops_warning_printed, _collective_ops_group_key, _collective_ops_instance_key + if all(x.shape.num_elements() == 0 for device in self._devices.values() for x in device.grad_clean.values()): + return + if not _collective_ops_warning_printed: + print("------------------------------------------------------------------------") + print("WARNING: Using slow fallback implementation for inter-GPU communication.") + print("Please use TensorFlow 1.14 on Linux for optimal training performance.") + print("------------------------------------------------------------------------") + _collective_ops_warning_printed = True + for device in self._devices.values(): + with tf.device(device.name): + combo = [tf.reshape(x, [x.shape.num_elements()]) for x in device.grad_clean.values()] + combo = tf.concat(combo, axis=0) + combo = collective_ops.all_reduce(combo, merge_op='Add', final_op='Id', + group_size=len(self._devices), group_key=_collective_ops_group_key, + instance_key=_collective_ops_instance_key) + cur_ofs = 0 + for var, grad_old in device.grad_clean.items(): + grad_new = tf.reshape(combo[cur_ofs : cur_ofs + grad_old.shape.num_elements()], grad_old.shape) + cur_ofs += grad_old.shape.num_elements() + device.grad_clean[var] = grad_new + _collective_ops_instance_key += 1 + + +class SimpleAdam: + """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" + + def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): + self.name = name + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.all_state_vars = [] + + def variables(self): + return self.all_state_vars + + def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): + assert gate_gradients == tf.train.Optimizer.GATE_NONE + return list(zip(tf.gradients(loss, var_list), var_list)) + + def apply_gradients(self, grads_and_vars): + with tf.name_scope(self.name): + state_vars = [] + update_ops = [] + + # Adjust learning rate to deal with startup bias. + with tf.control_dependencies(None): + b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + state_vars += [b1pow_var, b2pow_var] + b1pow_new = b1pow_var * self.beta1 + b2pow_new = b2pow_var * self.beta2 + update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] + lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) + + # Construct ops to update each variable. + for grad, var in grads_and_vars: + with tf.control_dependencies(None): + m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + state_vars += [m_var, v_var] + m_new = self.beta1 * m_var + (1 - self.beta1) * grad + v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) + var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) + update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] + + # Group everything together. + self.all_state_vars += state_vars + return tf.group(*update_ops) diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/tfutil.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/tfutil.py new file mode 100644 index 0000000000000000000000000000000000000000..a8781228748103b0e60b409b79a7dc6ed3da75d9 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/tflib/tfutil.py @@ -0,0 +1,262 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Miscellaneous helper utils for Tensorflow.""" + +import os +import numpy as np +import tensorflow as tf + +# Silence deprecation warnings from TensorFlow 1.13 onwards +import logging +logging.getLogger('tensorflow').setLevel(logging.ERROR) +import tensorflow.contrib # requires TensorFlow 1.x! +tf.contrib = tensorflow.contrib + +from typing import Any, Iterable, List, Union + +TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] +"""A type that represents a valid Tensorflow expression.""" + +TfExpressionEx = Union[TfExpression, int, float, np.ndarray] +"""A type that can be converted to a valid Tensorflow expression.""" + + +def run(*args, **kwargs) -> Any: + """Run the specified ops in the default session.""" + assert_tf_initialized() + return tf.get_default_session().run(*args, **kwargs) + + +def is_tf_expression(x: Any) -> bool: + """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" + return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) + + +def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: + """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" + return [dim.value for dim in shape] + + +def flatten(x: TfExpressionEx) -> TfExpression: + """Shortcut function for flattening a tensor.""" + with tf.name_scope("Flatten"): + return tf.reshape(x, [-1]) + + +def log2(x: TfExpressionEx) -> TfExpression: + """Logarithm in base 2.""" + with tf.name_scope("Log2"): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + + +def exp2(x: TfExpressionEx) -> TfExpression: + """Exponent in base 2.""" + with tf.name_scope("Exp2"): + return tf.exp(x * np.float32(np.log(2.0))) + + +def erfinv(y: TfExpressionEx) -> TfExpression: + """Inverse of the error function.""" + # pylint: disable=no-name-in-module + from tensorflow.python.ops.distributions import special_math + return special_math.erfinv(y) + + +def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: + """Linear interpolation.""" + with tf.name_scope("Lerp"): + return a + (b - a) * t + + +def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: + """Linear interpolation with clip.""" + with tf.name_scope("LerpClip"): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + + +def absolute_name_scope(scope: str) -> tf.name_scope: + """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" + return tf.name_scope(scope + "/") + + +def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: + """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" + return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) + + +def _sanitize_tf_config(config_dict: dict = None) -> dict: + # Defaults. + cfg = dict() + cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. + cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. + cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + cfg["env.HDF5_USE_FILE_LOCKING"] = "FALSE" # Disable HDF5 file locking to avoid concurrency issues with network shares. + cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. + cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. + + # Remove defaults for environment variables that are already set. + for key in list(cfg): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + if fields[1] in os.environ: + del cfg[key] + + # User overrides. + if config_dict is not None: + cfg.update(config_dict) + return cfg + + +def init_tf(config_dict: dict = None) -> None: + """Initialize TensorFlow session using good default settings.""" + # Skip if already initialized. + if tf.get_default_session() is not None: + return + + # Setup config dict and random seeds. + cfg = _sanitize_tf_config(config_dict) + np_random_seed = cfg["rnd.np_random_seed"] + if np_random_seed is not None: + np.random.seed(np_random_seed) + tf_random_seed = cfg["rnd.tf_random_seed"] + if tf_random_seed == "auto": + tf_random_seed = np.random.randint(1 << 31) + if tf_random_seed is not None: + tf.set_random_seed(tf_random_seed) + + # Setup environment variables. + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + os.environ[fields[1]] = str(value) + + # Create default TensorFlow session. + create_session(cfg, force_as_default=True) + + +def assert_tf_initialized(): + """Check that TensorFlow session has been initialized.""" + if tf.get_default_session() is None: + raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") + + +def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: + """Create tf.Session based on config dict.""" + # Setup TensorFlow config proto. + cfg = _sanitize_tf_config(config_dict) + config_proto = tf.ConfigProto() + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] not in ["rnd", "env"]: + obj = config_proto + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + + # Create session. + session = tf.Session(config=config_proto) + if force_as_default: + # pylint: disable=protected-access + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() + return session + + +def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: + """Initialize all tf.Variables that have not already been initialized. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tf.variables_initializer(tf.report_uninitialized_variables()).run() + """ + assert_tf_initialized() + if target_vars is None: + target_vars = tf.global_variables() + + test_vars = [] + test_ops = [] + + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in target_vars: + assert is_tf_expression(var) + + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + + with absolute_name_scope(var.name.split(":")[0]): + test_ops.append(tf.is_variable_initialized(var)) + + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + + +def set_vars(var_to_value_dict: dict) -> None: + """Set the values of given tf.Variables. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + """ + assert_tf_initialized() + ops = [] + feed_dict = {} + + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(":")[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter + + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + + run(ops, feed_dict) + + +def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): + """Create tf.Variable with large initial value without bloating the tf graph.""" + assert_tf_initialized() + assert isinstance(initial_value, np.ndarray) + zeros = tf.zeros(initial_value.shape, initial_value.dtype) + var = tf.Variable(zeros, *args, **kwargs) + set_vars({var: initial_value}) + return var + + +def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): + """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. + Can be used as an input transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if nhwc_to_nchw: + images = tf.transpose(images, [0, 3, 1, 2]) + return images * ((drange[1] - drange[0]) / 255) + drange[0] + + +def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): + """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. + Can be used as an output transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if shrink > 1: + ksize = [1, 1, shrink, shrink] + images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") + if nchw_to_nhwc: + images = tf.transpose(images, [0, 2, 3, 1]) + scale = 255 / (drange[1] - drange[0]) + images = images * scale + (0.5 - drange[0] * scale) + return tf.saturate_cast(images, tf.uint8) diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/util.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/util.py new file mode 100644 index 0000000000000000000000000000000000000000..07438ce07b74fd03f6a37507634ed0d6cb34b38e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/dnnlib/util.py @@ -0,0 +1,472 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import tempfile +import urllib +import urllib.request +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: str) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + + +# Cache directories +# ------------------------------------------------------------------------------------------ + +_dnnlib_cache_dir = None + +def set_cache_dir(path: str) -> None: + global _dnnlib_cache_dir + _dnnlib_cache_dir = path + +def make_cache_dir_path(*paths: str) -> str: + if _dnnlib_cache_dir is not None: + return os.path.join(_dnnlib_cache_dir, *paths) + if 'DNNLIB_CACHE_DIR' in os.environ: + return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths) + if 'HOME' in os.environ: + return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths) + if 'USERPROFILE' in os.environ: + return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths) + return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths) + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any: + """Finds the python class with the given name and constructs it with the given arguments.""" + return call_func_by_name(*args, func_name=class_name, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + module = obj.__module__ + if module == '__main__': + module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0] + return module + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any, allow_file_urls: bool = False) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + if allow_file_urls and obj.startswith('file://'): + return True + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert num_attempts >= 1 + assert not (return_filename and (not cache)) + + # Doesn't look like an URL scheme so interpret it as a local filename. + if not re.match('^[a-z]+://', url): + return url if return_filename else open(url, "rb") + + # Handle file URLs. This code handles unusual file:// patterns that + # arise on Windows: + # + # file:///c:/foo.txt + # + # which would translate to a local '/c:/foo.txt' filename that's + # invalid. Drop the forward slash for such pathnames. + # + # If you touch this code path, you should test it on both Linux and + # Windows. + # + # Some internet resources suggest using urllib.request.url2pathname() but + # but that converts forward slashes to backslashes and this causes + # its own set of problems. + if url.startswith('file://'): + filename = urllib.parse.urlparse(url).path + if re.match(r'^/[a-zA-Z]:', filename): + filename = filename[1:] + return filename if return_filename else open(filename, "rb") + + assert is_url(url) + + # Lookup from cache. + if cache_dir is None: + cache_dir = make_cache_dir_path('downloads') + + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + filename = cache_files[0] + return filename if return_filename else open(filename, "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive download quota exceeded -- please try again later") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + if return_filename: + return cache_file + + # Return data as file object. + assert not return_filename + return io.BytesIO(url_data) diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/generate.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/generate.py new file mode 100644 index 0000000000000000000000000000000000000000..42210a5a40cbd592ff0898b0ac3df736b34b8e98 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/generate.py @@ -0,0 +1,123 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Generate images using pretrained network pickle.""" + +import argparse +import os +import pickle +import re + +import numpy as np +import PIL.Image + +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- + +def generate_images(network_pkl, seeds, truncation_psi, outdir, class_idx, dlatents_npz): + tflib.init_tf() + print('Loading networks from "%s"...' % network_pkl) + with dnnlib.util.open_url(network_pkl) as fp: + _G, _D, Gs = pickle.load(fp) + + os.makedirs(outdir, exist_ok=True) + + # Render images for a given dlatent vector. + if dlatents_npz is not None: + print(f'Generating images from dlatents file "{dlatents_npz}"') + dlatents = np.load(dlatents_npz)['dlatents'] + assert dlatents.shape[1:] == (18, 512) # [N, 18, 512] + imgs = Gs.components.synthesis.run(dlatents, output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)) + for i, img in enumerate(imgs): + fname = f'{outdir}/dlatent{i:02d}.png' + print (f'Saved {fname}') + PIL.Image.fromarray(img, 'RGB').save(fname) + return + + # Render images for dlatents initialized from random seeds. + Gs_kwargs = { + 'output_transform': dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True), + 'randomize_noise': False + } + if truncation_psi is not None: + Gs_kwargs['truncation_psi'] = truncation_psi + + noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')] + label = np.zeros([1] + Gs.input_shapes[1][1:]) + if class_idx is not None: + label[:, class_idx] = 1 + + for seed_idx, seed in enumerate(seeds): + print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) + rnd = np.random.RandomState(seed) + z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component] + tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] + images = Gs.run(z, label, **Gs_kwargs) # [minibatch, height, width, channel] + PIL.Image.fromarray(images[0], 'RGB').save(f'{outdir}/seed{seed:04d}.png') + +#---------------------------------------------------------------------------- + +def _parse_num_range(s): + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + # Generate curated MetFaces images without truncation (Fig.10 left) + python %(prog)s --outdir=out --trunc=1 --seeds=85,265,297,849 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + + # Generate uncurated MetFaces images with truncation (Fig.12 upper left) + python %(prog)s --outdir=out --trunc=0.7 --seeds=600-605 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl + + # Generate class conditional CIFAR-10 images (Fig.17 left, Car) + python %(prog)s --outdir=out --trunc=1 --seeds=0-35 --class=1 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/cifar10.pkl + + # Render image from projected latent vector + python %(prog)s --outdir=out --dlatents=out/dlatents.npz \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Generate images using pretrained network pickle.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + g = parser.add_mutually_exclusive_group(required=True) + g.add_argument('--seeds', type=_parse_num_range, help='List of random seeds') + g.add_argument('--dlatents', dest='dlatents_npz', help='Generate images for saved dlatents') + parser.add_argument('--trunc', dest='truncation_psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) + parser.add_argument('--class', dest='class_idx', type=int, help='Class label (default: unconditional)') + parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR') + + args = parser.parse_args() + generate_images(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c61c745d36a1e35568fe4310c780c34414173e0 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/frechet_inception_distance.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/frechet_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..1f6be674d68eb2b1df55654842ddfdecb1843c4e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/frechet_inception_distance.py @@ -0,0 +1,93 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Frechet Inception Distance (FID) from the paper +"GANs trained by a two time-scale update rule converge to a local Nash equilibrium".""" + +import os +import pickle +import numpy as np +import scipy +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +class FID(metric_base.MetricBase): + def __init__(self, max_reals, num_fakes, minibatch_per_gpu, use_cached_real_stats=True, **kwargs): + super().__init__(**kwargs) + self.max_reals = max_reals + self.num_fakes = num_fakes + self.minibatch_per_gpu = minibatch_per_gpu + self.use_cached_real_stats = use_cached_real_stats + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_features.pkl') as f: # identical to http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + feature_net = pickle.load(f) + + # Calculate statistics for reals. + cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if self.use_cached_real_stats and os.path.isfile(cache_file): + with open(cache_file, 'rb') as f: + mu_real, sigma_real = pickle.load(f) + else: + nfeat = feature_net.output_shape[1] + mu_real = np.zeros(nfeat) + sigma_real = np.zeros([nfeat, nfeat]) + num_real = 0 + for images, _labels, num in self._iterate_reals(minibatch_size): + if self.max_reals is not None: + num = min(num, self.max_reals - num_real) + if images.shape[1] == 1: + images = np.tile(images, [1, 3, 1, 1]) + for feat in list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num]: + mu_real += feat + sigma_real += np.outer(feat, feat) + num_real += 1 + if self.max_reals is not None and num_real >= self.max_reals: + break + mu_real /= num_real + sigma_real /= num_real + sigma_real -= np.outer(mu_real, mu_real) + with open(cache_file, 'wb') as f: + pickle.dump((mu_real, sigma_real), f) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate statistics for fakes. + feat_fake = [] + for begin in range(0, self.num_fakes, minibatch_size): + self._report_progress(begin, self.num_fakes) + feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0)) + feat_fake = np.stack(feat_fake[:self.num_fakes]) + mu_fake = np.mean(feat_fake, axis=0) + sigma_fake = np.cov(feat_fake, rowvar=False) + + # Calculate FID. + m = np.square(mu_fake - mu_real).sum() + s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member + dist = m + np.trace(sigma_fake + sigma_real - 2*s) + self._report_result(np.real(dist)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/inception_score.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/inception_score.py new file mode 100644 index 0000000000000000000000000000000000000000..c33f089319295ba1b3007aa87a1f30f495bf679f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/inception_score.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Inception Score (IS) from the paper +"Improved techniques for training GANs".""" + +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +class IS(metric_base.MetricBase): + def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs): + super().__init__(**kwargs) + self.num_images = num_images + self.num_splits = num_splits + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_softmax.pkl') as f: + inception = pickle.load(f) + activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + inception_clone = inception.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(inception_clone.get_output_for(images)) + + # Calculate activations for fakes. + for begin in range(0, self.num_images, minibatch_size): + self._report_progress(begin, self.num_images) + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] + + # Calculate IS. + scores = [] + for i in range(self.num_splits): + part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits] + kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) + kl = np.mean(np.sum(kl, 1)) + scores.append(np.exp(kl)) + self._report_result(np.mean(scores), suffix='_mean') + self._report_result(np.std(scores), suffix='_std') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/kernel_inception_distance.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/kernel_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..20fa8db53b915bdc4e33f9f7d3961d7d8eb83de5 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/kernel_inception_distance.py @@ -0,0 +1,94 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Kernel Inception Distance (KID) from the paper +"Demystifying MMD GANs".""" + +import os +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +def compute_kid(feat_real, feat_fake, num_subsets=100, max_subset_size=1000): + n = feat_real.shape[1] + m = min(min(feat_real.shape[0], feat_fake.shape[0]), max_subset_size) + t = 0 + for _subset_idx in range(num_subsets): + x = feat_fake[np.random.choice(feat_fake.shape[0], m, replace=False)] + y = feat_real[np.random.choice(feat_real.shape[0], m, replace=False)] + a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3 + b = (x @ y.T / n + 1) ** 3 + t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m + return t / num_subsets / m + +#---------------------------------------------------------------------------- + +class KID(metric_base.MetricBase): + def __init__(self, max_reals, num_fakes, minibatch_per_gpu, use_cached_real_stats=True, **kwargs): + super().__init__(**kwargs) + self.max_reals = max_reals + self.num_fakes = num_fakes + self.minibatch_per_gpu = minibatch_per_gpu + self.use_cached_real_stats = use_cached_real_stats + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/inception_v3_features.pkl') as f: # identical to http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + feature_net = pickle.load(f) + + # Calculate statistics for reals. + cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if self.use_cached_real_stats and os.path.isfile(cache_file): + with open(cache_file, 'rb') as f: + feat_real = pickle.load(f) + else: + feat_real = [] + for images, _labels, num in self._iterate_reals(minibatch_size): + if self.max_reals is not None: + num = min(num, self.max_reals - len(feat_real)) + if images.shape[1] == 1: + images = np.tile(images, [1, 3, 1, 1]) + feat_real += list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num] + if self.max_reals is not None and len(feat_real) >= self.max_reals: + break + feat_real = np.stack(feat_real) + with open(cache_file, 'wb') as f: + pickle.dump(feat_real, f) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate statistics for fakes. + feat_fake = [] + for begin in range(0, self.num_fakes, minibatch_size): + self._report_progress(begin, self.num_fakes) + feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0)) + feat_fake = np.stack(feat_fake[:self.num_fakes]) + + # Calculate KID. + kid = compute_kid(feat_real, feat_fake) + self._report_result(np.real(kid), fmt='%-12.8f') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/linear_separability.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/linear_separability.py new file mode 100644 index 0000000000000000000000000000000000000000..d95e12b8f325c3ce2bedffb1dd90f1e9c849e601 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/linear_separability.py @@ -0,0 +1,184 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Linear Separability (LS) from the paper +"A Style-Based Generator Architecture for Generative Adversarial Networks".""" + +import pickle +from collections import defaultdict +import numpy as np +import sklearn.svm +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +classifier_urls = [ + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-00-male.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-01-smiling.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-02-attractive.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-03-wavy-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-04-young.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-05-5-o-clock-shadow.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-06-arched-eyebrows.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-07-bags-under-eyes.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-08-bald.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-09-bangs.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-10-big-lips.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-11-big-nose.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-12-black-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-13-blond-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-14-blurry.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-15-brown-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-16-bushy-eyebrows.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-17-chubby.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-18-double-chin.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-19-eyeglasses.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-20-goatee.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-21-gray-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-22-heavy-makeup.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-23-high-cheekbones.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-24-mouth-slightly-open.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-25-mustache.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-26-narrow-eyes.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-27-no-beard.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-28-oval-face.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-29-pale-skin.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-30-pointy-nose.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-31-receding-hairline.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-32-rosy-cheeks.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-33-sideburns.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-34-straight-hair.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-35-wearing-earrings.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-36-wearing-hat.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-37-wearing-lipstick.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-38-wearing-necklace.pkl', + 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/celebahq-classifier-39-wearing-necktie.pkl', +] + +#---------------------------------------------------------------------------- + +def prob_normalize(p): + p = np.asarray(p).astype(np.float32) + assert len(p.shape) == 2 + return p / np.sum(p) + +def mutual_information(p): + p = prob_normalize(p) + px = np.sum(p, axis=1) + py = np.sum(p, axis=0) + result = 0.0 + for x in range(p.shape[0]): + p_x = px[x] + for y in range(p.shape[1]): + p_xy = p[x][y] + p_y = py[y] + if p_xy > 0.0: + result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output + return result + +def entropy(p): + p = prob_normalize(p) + result = 0.0 + for x in range(p.shape[0]): + for y in range(p.shape[1]): + p_xy = p[x][y] + if p_xy > 0.0: + result -= p_xy * np.log2(p_xy) + return result + +def conditional_entropy(p): + # H(Y|X) where X corresponds to axis 0, Y to axis 1 + # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0? + p = prob_normalize(p) + y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y) + return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up. + +#---------------------------------------------------------------------------- + +class LS(metric_base.MetricBase): + def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs): + assert num_keep <= num_samples + super().__init__(**kwargs) + self.num_samples = num_samples + self.num_keep = num_keep + self.attrib_indices = attrib_indices + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph for each GPU. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + + # Generate images. + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **G_kwargs) + images = Gs_clone.get_output_for(latents, None, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + + # Downsample to 256x256. The attribute classifiers were built for 256x256. + if images.shape[2] > 256: + factor = images.shape[2] // 256 + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3, 5]) + + # Run classifier for each attribute. + result_dict = dict(latents=latents, dlatents=dlatents[:,-1]) + for attrib_idx in self.attrib_indices: + with dnnlib.util.open_url(classifier_urls[attrib_idx]) as f: + classifier = pickle.load(f) + logits = classifier.get_output_for(images, None) + predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1)) + result_dict[attrib_idx] = predictions + result_expr.append(result_dict) + + # Sampling loop. + results = [] + for begin in range(0, self.num_samples, minibatch_size): + self._report_progress(begin, self.num_samples) + results += tflib.run(result_expr) + results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()} + + # Calculate conditional entropy for each attribute. + conditional_entropies = defaultdict(list) + for attrib_idx in self.attrib_indices: + # Prune the least confident samples. + pruned_indices = list(range(self.num_samples)) + pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) + pruned_indices = pruned_indices[:self.num_keep] + + # Fit SVM to the remaining samples. + svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) + for space in ['latents', 'dlatents']: + svm_inputs = results[space][pruned_indices] + try: + svm = sklearn.svm.LinearSVC() + svm.fit(svm_inputs, svm_targets) + svm.score(svm_inputs, svm_targets) + svm_outputs = svm.predict(svm_inputs) + except: + svm_outputs = svm_targets # assume perfect prediction + + # Calculate conditional entropy. + p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)] + conditional_entropies[space].append(conditional_entropy(p)) + + # Calculate separability scores. + scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()} + self._report_result(scores['latents'], suffix='_z') + self._report_result(scores['dlatents'], suffix='_w') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/metric_base.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/metric_base.py new file mode 100644 index 0000000000000000000000000000000000000000..84fab74695b60ff0fc8a2ee7dd90ec581c77ad9b --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/metric_base.py @@ -0,0 +1,137 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Common definitions for quality metrics.""" + +import os +import time +import hashlib +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from training import dataset + +#---------------------------------------------------------------------------- +# Base class for metrics. + +class MetricBase: + def __init__(self, name, force_dataset_args={}, force_G_kwargs={}): + # Constructor args. + self.name = name + self.force_dataset_args = force_dataset_args + self.force_G_kwargs = force_G_kwargs + + # Configuration. + self._dataset_args = dnnlib.EasyDict() + self._run_dir = None + self._progress_fn = None + + # Internal state. + self._results = [] + self._network_name = '' + self._eval_time = 0 + self._dataset = None + + def configure(self, dataset_args={}, run_dir=None, progress_fn=None): + self._dataset_args = dnnlib.EasyDict(dataset_args) + self._dataset_args.update(self.force_dataset_args) + self._run_dir = run_dir + self._progress_fn = progress_fn + + def run(self, network_pkl, num_gpus=1, G_kwargs=dict(is_validation=True)): + self._results = [] + self._network_name = os.path.splitext(os.path.basename(network_pkl))[0] + self._eval_time = 0 + self._dataset = None + + with tf.Graph().as_default(), tflib.create_session().as_default(): # pylint: disable=not-context-manager + self._report_progress(0, 1) + time_begin = time.time() + with dnnlib.util.open_url(network_pkl) as f: + G, D, Gs = pickle.load(f) + + G_kwargs = dnnlib.EasyDict(G_kwargs) + G_kwargs.update(self.force_G_kwargs) + self._evaluate(G=G, D=D, Gs=Gs, G_kwargs=G_kwargs, num_gpus=num_gpus) + + self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init + self._report_progress(1, 1) + if self._dataset is not None: + self._dataset.close() + self._dataset = None + + result_str = self.get_result_str() + print(result_str) + if self._run_dir is not None and os.path.isdir(self._run_dir): + with open(os.path.join(self._run_dir, f'metric-{self.name}.txt'), 'at') as f: + f.write(result_str + '\n') + + def get_result_str(self): + title = self._network_name + if len(title) > 29: + title = '...' + title[-26:] + result_str = f'{title:<30s} time {dnnlib.util.format_time(self._eval_time):<12s}' + for res in self._results: + result_str += f' {self.name}{res.suffix} {res.fmt % res.value}' + return result_str.strip() + + def update_autosummaries(self): + for res in self._results: + tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value) + + def _evaluate(self, **_kwargs): + raise NotImplementedError # to be overridden by subclasses + + def _report_result(self, value, suffix='', fmt='%-10.4f'): + self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)] + + def _report_progress(self, cur, total): + if self._progress_fn is not None: + self._progress_fn(cur, total) + + def _get_cache_file_for_reals(self, extension='pkl', **kwargs): + all_args = dnnlib.EasyDict(metric_name=self.name) + all_args.update(self._dataset_args) + all_args.update(kwargs) + md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8')) + dataset_name = os.path.splitext(os.path.basename(self._dataset_args.path))[0] + return dnnlib.make_cache_dir_path('metrics', f'{md5.hexdigest()}-{self.name}-{dataset_name}.{extension}') + + def _get_dataset_obj(self): + if self._dataset is None: + self._dataset = dataset.load_dataset(**self._dataset_args) + return self._dataset + + def _iterate_reals(self, minibatch_size): + print(f'Calculating real image statistics for {self.name}...') + dataset_obj = self._get_dataset_obj() + while True: + images = [] + labels = [] + for _ in range(minibatch_size): + image, label = dataset_obj.get_minibatch_np(1) + if image is None: + break + images.append(image) + labels.append(label) + num = len(images) + if num == 0: + break + images = np.concatenate(images + [images[-1]] * (minibatch_size - num), axis=0) + labels = np.concatenate(labels + [labels[-1]] * (minibatch_size - num), axis=0) + yield images, labels, num + if num < minibatch_size: + break + + def _get_random_labels_tf(self, minibatch_size): + return self._get_dataset_obj().get_random_labels_tf(minibatch_size) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/metric_defaults.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/metric_defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..b456e9c6b44f1c64514a6c9b7451741bf7cb9c62 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/metric_defaults.py @@ -0,0 +1,36 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Default metric definitions.""" + +from dnnlib import EasyDict + +#---------------------------------------------------------------------------- + +metric_defaults = EasyDict([(args.name, args) for args in [ + # ADA paper. + EasyDict(name='fid50k_full', class_name='metrics.frechet_inception_distance.FID', max_reals=None, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)), + EasyDict(name='kid50k_full', class_name='metrics.kernel_inception_distance.KID', max_reals=1000000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)), + EasyDict(name='pr50k3_full', class_name='metrics.precision_recall.PR', max_reals=200000, num_fakes=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000, force_dataset_args=dict(shuffle=False, max_images=None, repeat=False, mirror_augment=False)), + EasyDict(name='is50k', class_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)), + + # Legacy: StyleGAN2. + EasyDict(name='fid50k', class_name='metrics.frechet_inception_distance.FID', max_reals=50000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)), + EasyDict(name='kid50k', class_name='metrics.kernel_inception_distance.KID', max_reals=50000, num_fakes=50000, minibatch_per_gpu=8, force_dataset_args=dict(shuffle=False, max_images=None)), + EasyDict(name='pr50k3', class_name='metrics.precision_recall.PR', max_reals=50000, num_fakes=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000, force_dataset_args=dict(shuffle=False, max_images=None)), + EasyDict(name='ppl2_wend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + + # Legacy: StyleGAN. + EasyDict(name='ppl_zfull', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ppl_wfull', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ppl_zend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ppl_wend', class_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=2, force_dataset_args=dict(shuffle=False, max_images=None), force_G_kwargs=dict(dtype='float32', mapping_dtype='float32', num_fp16_res=0)), + EasyDict(name='ls', class_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4, force_dataset_args=dict(shuffle=False, max_images=None)), +]]) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/perceptual_path_length.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/perceptual_path_length.py new file mode 100644 index 0000000000000000000000000000000000000000..15a327ba9b0d119f927051da161c45839745d51d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/perceptual_path_length.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Perceptual Path Length (PPL) from the paper +"A Style-Based Generator Architecture for Generative Adversarial Networks".""" + +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +# Normalize batch of vectors. +def normalize(v): + return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True)) + +# Spherical interpolation of a batch of vectors. +def slerp(a, b, t): + a = normalize(a) + b = normalize(b) + d = tf.reduce_sum(a * b, axis=-1, keepdims=True) + p = t * tf.math.acos(d) + c = normalize(b - d * a) + d = a * tf.math.cos(p) + c * tf.math.sin(p) + return normalize(d) + +#---------------------------------------------------------------------------- + +class PPL(metric_base.MetricBase): + def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, **kwargs): + assert space in ['z', 'w'] + assert sampling in ['full', 'end'] + super().__init__(**kwargs) + self.num_samples = num_samples + self.epsilon = epsilon + self.space = space + self.sampling = sampling + self.crop = crop + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph. + distance_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')] + + # Generate random latents and interpolation t-values. + lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) + lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) + labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) + + # Interpolate in W or Z. + if self.space == 'w': + dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **G_kwargs) + dlat_t01 = tf.cast(dlat_t01, tf.float32) + dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] + dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) + dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) + dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) + else: # space == 'z' + lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] + lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) + lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) + lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) + dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **G_kwargs) + + # Synthesize images. + with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch + images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **G_kwargs) + images = tf.cast(images, tf.float32) + + # Crop only the face region. + if self.crop: + c = int(images.shape[2] // 8) + images = images[:, :, c*3 : c*7, c*2 : c*6] + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + factor = images.shape[2] // 256 + if factor > 1: + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3,5]) + + # Scale dynamic range from [-1,1] to [0,255] for VGG. + images = (images + 1) * (255 / 2) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + + # Evaluate perceptual distance. + img_e0, img_e1 = images[0::2], images[1::2] + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16_zhang_perceptual.pkl') as f: + distance_measure = pickle.load(f) + distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) + + # Sampling loop. + all_distances = [] + for begin in range(0, self.num_samples, minibatch_size): + self._report_progress(begin, self.num_samples) + all_distances += tflib.run(distance_expr) + all_distances = np.concatenate(all_distances, axis=0) + + # Reject outliers. + lo = np.percentile(all_distances, 1, interpolation='lower') + hi = np.percentile(all_distances, 99, interpolation='higher') + filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) + self._report_result(np.mean(filtered_distances)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/precision_recall.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/precision_recall.py new file mode 100644 index 0000000000000000000000000000000000000000..dab3fecc3120214370ec12acfd8a36ef9f7e9aa8 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/metrics/precision_recall.py @@ -0,0 +1,234 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Precision/Recall (PR) from the paper +"Improved Precision and Recall Metric for Assessing Generative Models".""" + +import os +import pickle +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from metrics import metric_base + +#---------------------------------------------------------------------------- + +def batch_pairwise_distances(U, V): + """ Compute pairwise distances between two batches of feature vectors.""" + with tf.variable_scope('pairwise_dist_block'): + # Squared norms of each row in U and V. + norm_u = tf.reduce_sum(tf.square(U), 1) + norm_v = tf.reduce_sum(tf.square(V), 1) + + # norm_u as a row and norm_v as a column vectors. + norm_u = tf.reshape(norm_u, [-1, 1]) + norm_v = tf.reshape(norm_v, [1, -1]) + + # Pairwise squared Euclidean distances. + D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0) + + return D + +#---------------------------------------------------------------------------- + +class DistanceBlock(): + """Distance block.""" + def __init__(self, num_features, num_gpus): + self.num_features = num_features + self.num_gpus = num_gpus + + # Initialize TF graph to calculate pairwise distances. + with tf.device('/cpu:0'): + self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features]) + self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features]) + features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0) + distances_split = [] + for gpu_idx in range(self.num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx])) + self._distance_block = tf.concat(distances_split, axis=1) + + def pairwise_distances(self, U, V): + """Evaluate pairwise distances between two batches of feature vectors.""" + return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V}) + +#---------------------------------------------------------------------------- + +class ManifoldEstimator(): + """Finds an estimate for the manifold of given feature vectors.""" + def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None): + """Find an estimate of the manifold of given feature vectors.""" + num_images = features.shape[0] + self.nhood_sizes = nhood_sizes + self.num_nhoods = len(nhood_sizes) + self.row_batch_size = row_batch_size + self.col_batch_size = col_batch_size + self._ref_features = features + self._distance_block = distance_block + + # Estimate manifold of features by calculating distances to kth nearest neighbor of each sample. + self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16) + distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16) + seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32) + + for begin1 in range(0, num_images, row_batch_size): + end1 = min(begin1 + row_batch_size, num_images) + row_batch = features[begin1:end1] + + for begin2 in range(0, num_images, col_batch_size): + end2 = min(begin2 + col_batch_size, num_images) + col_batch = features[begin2:end2] + + # Compute distances between batches. + distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch) + + # Find the kth nearest neighbor from the current batch. + self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes] + + if clamp_to_percentile is not None: + max_distances = np.percentile(self.D, clamp_to_percentile, axis=0) + self.D[self.D > max_distances] = 0 #max_distances # 0 + + def evaluate(self, eval_features, return_realism=False, return_neighbors=False): + """Evaluate if new feature vectors are in the estimated manifold.""" + num_eval_images = eval_features.shape[0] + num_ref_images = self.D.shape[0] + distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16) + batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32) + #max_realism_score = np.zeros([num_eval_images,], dtype=np.float32) + realism_score = np.zeros([num_eval_images,], dtype=np.float32) + nearest_indices = np.zeros([num_eval_images,], dtype=np.int32) + + for begin1 in range(0, num_eval_images, self.row_batch_size): + end1 = min(begin1 + self.row_batch_size, num_eval_images) + feature_batch = eval_features[begin1:end1] + + for begin2 in range(0, num_ref_images, self.col_batch_size): + end2 = min(begin2 + self.col_batch_size, num_ref_images) + ref_batch = self._ref_features[begin2:end2] + + distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch) + + # From the minibatch of new feature vectors, determine if they are in the estimated manifold. + # If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold. + # The radii of the hyperspheres are determined from distances of neighborhood size k. + samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D + batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32) + + #max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) + #nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) + nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1) + realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1) + + if return_realism and return_neighbors: + return batch_predictions, realism_score, nearest_indices + elif return_realism: + return batch_predictions, realism_score + elif return_neighbors: + return batch_predictions, nearest_indices + + return batch_predictions + +#---------------------------------------------------------------------------- + +def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes, + row_batch_size, col_batch_size, num_gpus): + """Calculates k-NN precision and recall for two sets of feature vectors.""" + state = dnnlib.EasyDict() + #num_images = ref_features.shape[0] + num_features = feature_net.output_shape[1] + state.ref_features = ref_features + state.eval_features = eval_features + + # Initialize DistanceBlock and ManifoldEstimators. + distance_block = DistanceBlock(num_features, num_gpus) + state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes) + state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes) + + # Evaluate precision and recall using k-nearest neighbors. + #print(f'Evaluating k-NN precision and recall with {num_images} samples...') + #start = time.time() + + # Precision: How many points from eval_features are in ref_features manifold. + state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True) + state.knn_precision = state.precision.mean(axis=0) + + # Recall: How many points from ref_features are in eval_features manifold. + state.recall = state.eval_manifold.evaluate(state.ref_features) + state.knn_recall = state.recall.mean(axis=0) + + #elapsed_time = time.time() - start + #print(f'Done evaluation in: {elapsed_time:g}s') + + return state + +#---------------------------------------------------------------------------- + +class PR(metric_base.MetricBase): + def __init__(self, max_reals, num_fakes, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs): + super().__init__(**kwargs) + self.max_reals = max_reals + self.num_fakes = num_fakes + self.nhood_size = nhood_size + self.minibatch_per_gpu = minibatch_per_gpu + self.row_batch_size = row_batch_size + self.col_batch_size = col_batch_size + + def _evaluate(self, Gs, G_kwargs, num_gpus, **_kwargs): # pylint: disable=arguments-differ + minibatch_size = num_gpus * self.minibatch_per_gpu + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16.pkl') as f: + feature_net = pickle.load(f) + + # Calculate features for reals. + cache_file = self._get_cache_file_for_reals(max_reals=self.max_reals) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if os.path.isfile(cache_file): + with open(cache_file, 'rb') as f: + feat_real = pickle.load(f) + else: + feat_real = [] + for images, _labels, num in self._iterate_reals(minibatch_size): + if images.shape[1] == 1: images = np.tile(images, [1, 3, 1, 1]) + feat_real += list(feature_net.run(images, num_gpus=num_gpus, assume_frozen=True))[:num] + if self.max_reals is not None and len(feat_real) >= self.max_reals: + break + if self.max_reals is not None and len(feat_real) > self.max_reals: + feat_real = feat_real[:self.max_reals] + feat_real = np.stack(feat_real) + with open(cache_file, 'wb') as f: + pickle.dump(feat_real, f) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device(f'/gpu:{gpu_idx}'): + Gs_clone = Gs.clone() + feature_net_clone = feature_net.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + labels = self._get_random_labels_tf(self.minibatch_per_gpu) + images = Gs_clone.get_output_for(latents, labels, **G_kwargs) + if images.shape[1] == 1: images = tf.tile(images, [1, 3, 1, 1]) + images = tflib.convert_images_to_uint8(images) + result_expr.append(feature_net_clone.get_output_for(images)) + + # Calculate features for fakes. + feat_fake = [] + for begin in range(0, self.num_fakes, minibatch_size): + self._report_progress(begin, self.num_fakes) + feat_fake += list(np.concatenate(tflib.run(result_expr), axis=0)) + feat_fake = np.stack(feat_fake[:self.num_fakes]) + + # Calculate precision and recall. + state = knn_precision_recall_features(ref_features=feat_real, eval_features=feat_fake, feature_net=feature_net, + nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus) + self._report_result(state.knn_precision[0], suffix='_precision') + self._report_result(state.knn_recall[0], suffix='_recall') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/projector.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/projector.py new file mode 100644 index 0000000000000000000000000000000000000000..8f6be7e75a785755e21be4531d3f9fedd7815794 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/projector.py @@ -0,0 +1,289 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Project given image to the latent space of pretrained network pickle.""" + +import argparse +import os +import pickle +import imageio + +import numpy as np +import PIL.Image +import tensorflow as tf +import tqdm + +import dnnlib +import dnnlib.tflib as tflib + +class Projector: + def __init__(self): + self.num_steps = 1000 + self.dlatent_avg_samples = 10000 + self.initial_learning_rate = 0.1 + self.initial_noise_factor = 0.05 + self.lr_rampdown_length = 0.25 + self.lr_rampup_length = 0.05 + self.noise_ramp_length = 0.75 + self.regularize_noise_weight = 1e5 + self.verbose = True + + self._Gs = None + self._minibatch_size = None + self._dlatent_avg = None + self._dlatent_std = None + self._noise_vars = None + self._noise_init_op = None + self._noise_normalize_op = None + self._dlatents_var = None + self._dlatent_noise_in = None + self._dlatents_expr = None + self._images_float_expr = None + self._images_uint8_expr = None + self._target_images_var = None + self._lpips = None + self._dist = None + self._loss = None + self._reg_sizes = None + self._lrate_in = None + self._opt = None + self._opt_step = None + self._cur_step = None + + def _info(self, *args): + if self.verbose: + print('Projector:', *args) + + def set_network(self, Gs, dtype='float16'): + if Gs is None: + self._Gs = None + return + self._Gs = Gs.clone(randomize_noise=False, dtype=dtype, num_fp16_res=0, fused_modconv=True) + + # Compute dlatent stats. + self._info(f'Computing W midpoint and stddev using {self.dlatent_avg_samples} samples...') + latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, L, C] + dlatent_samples = dlatent_samples[:, :1, :].astype(np.float32) # [N, 1, C] + self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, C] + self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 + self._info(f'std = {self._dlatent_std:g}') + + # Setup noise inputs. + self._info('Setting up noise inputs...') + self._noise_vars = [] + noise_init_ops = [] + noise_normalize_ops = [] + while True: + n = f'G_synthesis/noise{len(self._noise_vars)}' + if not n in self._Gs.vars: + break + v = self._Gs.vars[n] + self._noise_vars.append(v) + noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32))) + noise_mean = tf.reduce_mean(v) + noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5 + noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std)) + self._noise_init_op = tf.group(*noise_init_ops) + self._noise_normalize_op = tf.group(*noise_normalize_ops) + + # Build image output graph. + self._info('Building image output graph...') + self._minibatch_size = 1 + self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') + self._dlatent_noise_in = tf.placeholder(tf.float32, [], name='noise_in') + dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._dlatent_noise_in + self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1]) + self._images_float_expr = tf.cast(self._Gs.components.synthesis.get_output_for(self._dlatents_expr), tf.float32) + self._images_uint8_expr = tflib.convert_images_to_uint8(self._images_float_expr, nchw_to_nhwc=True) + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + proc_images_expr = (self._images_float_expr + 1) * (255 / 2) + sh = proc_images_expr.shape.as_list() + if sh[2] > 256: + factor = sh[2] // 256 + proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5]) + + # Build loss graph. + self._info('Building loss graph...') + self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var') + if self._lpips is None: + with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metrics/vgg16_zhang_perceptual.pkl') as f: + self._lpips = pickle.load(f) + self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var) + self._loss = tf.reduce_sum(self._dist) + + # Build noise regularization graph. + self._info('Building noise regularization graph...') + reg_loss = 0.0 + for v in self._noise_vars: + sz = v.shape[2] + while True: + reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2 + if sz <= 8: + break # Small enough already + v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale + v = tf.reduce_mean(v, axis=[3, 5]) + sz = sz // 2 + self._loss += reg_loss * self.regularize_noise_weight + + # Setup optimizer. + self._info('Setting up optimizer...') + self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in') + self._opt = tflib.Optimizer(learning_rate=self._lrate_in) + self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars) + self._opt_step = self._opt.apply_updates() + + def start(self, target_images): + assert self._Gs is not None + + # Prepare target images. + self._info('Preparing target images...') + target_images = np.asarray(target_images, dtype='float32') + target_images = (target_images + 1) * (255 / 2) + sh = target_images.shape + assert sh[0] == self._minibatch_size + if sh[2] > self._target_images_var.shape[2]: + factor = sh[2] // self._target_images_var.shape[2] + target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5)) + + # Initialize optimization state. + self._info('Initializing optimization state...') + dlatents = np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1]) + tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: dlatents}) + tflib.run(self._noise_init_op) + self._opt.reset_optimizer_state() + self._cur_step = 0 + + def step(self): + assert self._cur_step is not None + if self._cur_step >= self.num_steps: + return 0, 0 + + # Choose hyperparameters. + t = self._cur_step / self.num_steps + dlatent_noise = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2 + lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length) + lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) + lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length) + learning_rate = self.initial_learning_rate * lr_ramp + + # Execute optimization step. + feed_dict = {self._dlatent_noise_in: dlatent_noise, self._lrate_in: learning_rate} + _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict) + tflib.run(self._noise_normalize_op) + self._cur_step += 1 + return dist_value, loss_value + + @property + def cur_step(self): + return self._cur_step + + @property + def dlatents(self): + return tflib.run(self._dlatents_expr, {self._dlatent_noise_in: 0}) + + @property + def noises(self): + return tflib.run(self._noise_vars) + + @property + def images_float(self): + return tflib.run(self._images_float_expr, {self._dlatent_noise_in: 0}) + + @property + def images_uint8(self): + return tflib.run(self._images_uint8_expr, {self._dlatent_noise_in: 0}) + +#---------------------------------------------------------------------------- + +def project(network_pkl: str, target_fname: str, outdir: str, save_video: bool, seed: int): + # Load networks. + tflib.init_tf({'rnd.np_random_seed': seed}) + print('Loading networks from "%s"...' % network_pkl) + with dnnlib.util.open_url(network_pkl) as fp: + _G, _D, Gs = pickle.load(fp) + + # Load target image. + target_pil = PIL.Image.open(target_fname) + w, h = target_pil.size + s = min(w, h) + target_pil = target_pil.crop(((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2)) + target_pil= target_pil.convert('RGB') + target_pil = target_pil.resize((Gs.output_shape[3], Gs.output_shape[2]), PIL.Image.ANTIALIAS) + target_uint8 = np.array(target_pil, dtype=np.uint8) + target_float = target_uint8.astype(np.float32).transpose([2, 0, 1]) * (2 / 255) - 1 + + # Initialize projector. + proj = Projector() + proj.set_network(Gs) + proj.start([target_float]) + + # Setup output directory. + os.makedirs(outdir, exist_ok=True) + target_pil.save(f'{outdir}/target.png') + writer = None + if save_video: + writer = imageio.get_writer(f'{outdir}/proj.mp4', mode='I', fps=60, codec='libx264', bitrate='16M') + + # Run projector. + with tqdm.trange(proj.num_steps) as t: + for step in t: + assert step == proj.cur_step + if writer is not None: + writer.append_data(np.concatenate([target_uint8, proj.images_uint8[0]], axis=1)) + dist, loss = proj.step() + t.set_postfix(dist=f'{dist[0]:.4f}', loss=f'{loss:.2f}') + + # Save results. + PIL.Image.fromarray(proj.images_uint8[0], 'RGB').save(f'{outdir}/proj.png') + np.savez(f'{outdir}/dlatents.npz', dlatents=proj.dlatents) + if writer is not None: + writer.close() + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + python %(prog)s --outdir=out --target=targetimg.png \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/ffhq.pkl +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Project given image to the latent space of pretrained network pickle.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser.add_argument('--target', help='Target image file to project to', dest='target_fname', required=True) + parser.add_argument('--save-video', help='Save an mp4 video of optimization progress (default: true)', type=_str_to_bool, default=True) + parser.add_argument('--seed', help='Random seed', type=int, default=303) + parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR') + project(**vars(parser.parse_args())) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/style_mixing.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/style_mixing.py new file mode 100644 index 0000000000000000000000000000000000000000..7d183f853232f3630009937855a2e88647aa137e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/style_mixing.py @@ -0,0 +1,120 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Generate style mixing image matrix using pretrained network pickle.""" + +import argparse +import os +import pickle +import re + +import numpy as np +import PIL.Image + +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- + +def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, outdir, minibatch_size=4): + tflib.init_tf() + print('Loading networks from "%s"...' % network_pkl) + with dnnlib.util.open_url(network_pkl) as fp: + _G, _D, Gs = pickle.load(fp) + + w_avg = Gs.get_var('dlatent_avg') # [component] + Gs_syn_kwargs = { + 'output_transform': dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True), + 'randomize_noise': False, + 'minibatch_size': minibatch_size + } + + print('Generating W vectors...') + all_seeds = list(set(row_seeds + col_seeds)) + all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component] + all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component] + all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component] + w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component] + + print('Generating images...') + all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel] + image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} + + print('Generating style-mixed images...') + for row_seed in row_seeds: + for col_seed in col_seeds: + w = w_dict[row_seed].copy() + w[col_styles] = w_dict[col_seed][col_styles] + image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0] + image_dict[(row_seed, col_seed)] = image + + print('Saving images...') + os.makedirs(outdir, exist_ok=True) + for (row_seed, col_seed), image in image_dict.items(): + PIL.Image.fromarray(image, 'RGB').save(f'{outdir}/{row_seed}-{col_seed}.png') + + print('Saving image grid...') + _N, _C, H, W = Gs.output_shape + canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') + for row_idx, row_seed in enumerate([None] + row_seeds): + for col_idx, col_seed in enumerate([None] + col_seeds): + if row_seed is None and col_seed is None: + continue + key = (row_seed, col_seed) + if row_seed is None: + key = (col_seed, col_seed) + if col_seed is None: + key = (row_seed, row_seed) + canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) + canvas.save(f'{outdir}/grid.png') + +#---------------------------------------------------------------------------- + +def _parse_num_range(s): + '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' + + range_re = re.compile(r'^(\d+)-(\d+)$') + m = range_re.match(s) + if m: + return list(range(int(m.group(1)), int(m.group(2))+1)) + vals = s.split(',') + return [int(x) for x in vals] + +#---------------------------------------------------------------------------- + +_examples = '''examples: + + python %(prog)s --outdir=out --trunc=1 --rows=85,100,75,458,1500 --cols=55,821,1789,293 \\ + --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/metfaces.pkl +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Generate style mixing image matrix using pretrained network pickle.', + epilog=_examples, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) + parser.add_argument('--rows', dest='row_seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True) + parser.add_argument('--cols', dest='col_seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True) + parser.add_argument('--styles', dest='col_styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6') + parser.add_argument('--trunc', dest='truncation_psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) + parser.add_argument('--outdir', help='Where to save the output images', required=True, metavar='DIR') + + args = parser.parse_args() + style_mixing_example(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/train.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/train.py new file mode 100644 index 0000000000000000000000000000000000000000..5b36d7922670f98a7d71588f4ef0f7df16036b56 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/train.py @@ -0,0 +1,563 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Train a GAN using the techniques described in the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import os +import argparse +import json +import re +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +from training import training_loop +from training import dataset +from metrics import metric_defaults + +#---------------------------------------------------------------------------- + +class UserError(Exception): + pass + +#---------------------------------------------------------------------------- + +def setup_training_options( + # General options (not included in desc). + gpus = None, # Number of GPUs: , default = 1 gpu + snap = None, # Snapshot interval: , default = 50 ticks + + # Training dataset. + data = None, # Training dataset (required): + res = None, # Override dataset resolution: , default = highest available + mirror = None, # Augment dataset with x-flips: , default = False + + # Metrics (not included in desc). + metrics = None, # List of metric names: [], ['fid50k_full'] (default), ... + metricdata = None, # Metric dataset (optional): + + # Base config. + cfg = None, # Base config: 'auto' (default), 'stylegan2', 'paper256', 'paper512', 'paper1024', 'cifar', 'cifarbaseline' + gamma = None, # Override R1 gamma: , default = depends on cfg + kimg = None, # Override training duration: , default = depends on cfg + + # Discriminator augmentation. + aug = None, # Augmentation mode: 'ada' (default), 'noaug', 'fixed', 'adarv' + p = None, # Specify p for 'fixed' (required): + target = None, # Override ADA target for 'ada' and 'adarv': , default = depends on aug + augpipe = None, # Augmentation pipeline: 'blit', 'geom', 'color', 'filter', 'noise', 'cutout', 'bg', 'bgc' (default), ..., 'bgcfnc' + + # Comparison methods. + cmethod = None, # Comparison method: 'nocmethod' (default), 'bcr', 'zcr', 'pagan', 'wgangp', 'auxrot', 'spectralnorm', 'shallowmap', 'adropout' + dcap = None, # Multiplier for discriminator capacity: , default = 1 + + # Transfer learning. + resume = None, # Load previous network: 'noresume' (default), 'ffhq256', 'ffhq512', 'ffhq1024', 'celebahq256', 'lsundog256', , + freezed = None, # Freeze-D: , default = 0 discriminator layers +): + # Initialize dicts. + args = dnnlib.EasyDict() + args.G_args = dnnlib.EasyDict(func_name='training.networks.G_main') + args.D_args = dnnlib.EasyDict(func_name='training.networks.D_main') + args.G_opt_args = dnnlib.EasyDict(beta1=0.0, beta2=0.99) + args.D_opt_args = dnnlib.EasyDict(beta1=0.0, beta2=0.99) + args.loss_args = dnnlib.EasyDict(func_name='training.loss.stylegan2') + args.augment_args = dnnlib.EasyDict(class_name='training.augment.AdaptiveAugment') + + # --------------------------- + # General options: gpus, snap + # --------------------------- + + if gpus is None: + gpus = 1 + assert isinstance(gpus, int) + if not (gpus >= 1 and gpus & (gpus - 1) == 0): + raise UserError('--gpus must be a power of two') + args.num_gpus = gpus + + if snap is None: + snap = 50 + assert isinstance(snap, int) + if snap < 1: + raise UserError('--snap must be at least 1') + args.image_snapshot_ticks = snap + args.network_snapshot_ticks = snap + + # ----------------------------------- + # Training dataset: data, res, mirror + # ----------------------------------- + + assert data is not None + assert isinstance(data, str) + data_name = os.path.basename(os.path.abspath(data)) + if not os.path.isdir(data) or len(data_name) == 0: + raise UserError('--data must point to a directory containing *.tfrecords') + desc = data_name + + with tf.Graph().as_default(), tflib.create_session().as_default(): # pylint: disable=not-context-manager + args.train_dataset_args = dnnlib.EasyDict(path=data, max_label_size='full') + dataset_obj = dataset.load_dataset(**args.train_dataset_args) # try to load the data and see what comes out + args.train_dataset_args.resolution = dataset_obj.shape[-1] # be explicit about resolution + args.train_dataset_args.max_label_size = dataset_obj.label_size # be explicit about label size + validation_set_available = dataset_obj.has_validation_set + dataset_obj.close() + dataset_obj = None + + if res is None: + res = args.train_dataset_args.resolution + else: + assert isinstance(res, int) + if not (res >= 4 and res & (res - 1) == 0): + raise UserError('--res must be a power of two and at least 4') + if res > args.train_dataset_args.resolution: + raise UserError(f'--res cannot exceed maximum available resolution in the dataset ({args.train_dataset_args.resolution})') + desc += f'-res{res:d}' + args.train_dataset_args.resolution = res + + if mirror is None: + mirror = False + else: + assert isinstance(mirror, bool) + if mirror: + desc += '-mirror' + args.train_dataset_args.mirror_augment = mirror + + # ---------------------------- + # Metrics: metrics, metricdata + # ---------------------------- + + if metrics is None: + metrics = ['fid50k_full'] + assert isinstance(metrics, list) + assert all(isinstance(metric, str) for metric in metrics) + + args.metric_arg_list = [] + for metric in metrics: + if metric not in metric_defaults.metric_defaults: + raise UserError('\n'.join(['--metrics can only contain the following values:', 'none'] + list(metric_defaults.metric_defaults.keys()))) + args.metric_arg_list.append(metric_defaults.metric_defaults[metric]) + + args.metric_dataset_args = dnnlib.EasyDict(args.train_dataset_args) + if metricdata is not None: + assert isinstance(metricdata, str) + if not os.path.isdir(metricdata): + raise UserError('--metricdata must point to a directory containing *.tfrecords') + args.metric_dataset_args.path = metricdata + + # ----------------------------- + # Base config: cfg, gamma, kimg + # ----------------------------- + + if cfg is None: + cfg = 'auto' + assert isinstance(cfg, str) + desc += f'-{cfg}' + + cfg_specs = { + 'auto': dict(ref_gpus=-1, kimg=25000, mb=-1, mbstd=-1, fmaps=-1, lrate=-1, gamma=-1, ema=-1, ramp=0.05, map=2), # populated dynamically based on 'gpus' and 'res' + 'stylegan2': dict(ref_gpus=8, kimg=25000, mb=32, mbstd=4, fmaps=1, lrate=0.002, gamma=10, ema=10, ramp=None, map=8), # uses mixed-precision, unlike original StyleGAN2 + 'paper256': dict(ref_gpus=8, kimg=25000, mb=64, mbstd=8, fmaps=0.5, lrate=0.0025, gamma=1, ema=20, ramp=None, map=8), + 'paper512': dict(ref_gpus=8, kimg=25000, mb=64, mbstd=8, fmaps=1, lrate=0.0025, gamma=0.5, ema=20, ramp=None, map=8), + 'paper1024': dict(ref_gpus=8, kimg=25000, mb=32, mbstd=4, fmaps=1, lrate=0.002, gamma=2, ema=10, ramp=None, map=8), + 'cifar': dict(ref_gpus=2, kimg=100000, mb=64, mbstd=32, fmaps=0.5, lrate=0.0025, gamma=0.01, ema=500, ramp=0.05, map=2), + 'cifarbaseline': dict(ref_gpus=2, kimg=100000, mb=64, mbstd=32, fmaps=0.5, lrate=0.0025, gamma=0.01, ema=500, ramp=0.05, map=8), + } + + assert cfg in cfg_specs + spec = dnnlib.EasyDict(cfg_specs[cfg]) + if cfg == 'auto': + desc += f'{gpus:d}' + spec.ref_gpus = gpus + spec.mb = max(min(gpus * min(4096 // res, 32), 64), gpus) # keep gpu memory consumption at bay + spec.mbstd = min(spec.mb // gpus, 4) # other hyperparams behave more predictably if mbstd group size remains fixed + spec.fmaps = 1 if res >= 512 else 0.5 + spec.lrate = 0.002 if res >= 1024 else 0.0025 + spec.gamma = 0.0002 * (res ** 2) / spec.mb # heuristic formula + spec.ema = spec.mb * 10 / 32 + + args.total_kimg = spec.kimg + args.minibatch_size = spec.mb + args.minibatch_gpu = spec.mb // spec.ref_gpus + args.D_args.mbstd_group_size = spec.mbstd + args.G_args.fmap_base = args.D_args.fmap_base = int(spec.fmaps * 16384) + args.G_args.fmap_max = args.D_args.fmap_max = 512 + args.G_opt_args.learning_rate = args.D_opt_args.learning_rate = spec.lrate + args.loss_args.r1_gamma = spec.gamma + args.G_smoothing_kimg = spec.ema + args.G_smoothing_rampup = spec.ramp + args.G_args.mapping_layers = spec.map + args.G_args.num_fp16_res = args.D_args.num_fp16_res = 4 # enable mixed-precision training + args.G_args.conv_clamp = args.D_args.conv_clamp = 256 # clamp activations to avoid float16 overflow + + if cfg == 'cifar': + args.loss_args.pl_weight = 0 # disable path length regularization + args.G_args.style_mixing_prob = None # disable style mixing + args.D_args.architecture = 'orig' # disable residual skip connections + + if gamma is not None: + assert isinstance(gamma, float) + if not gamma >= 0: + raise UserError('--gamma must be non-negative') + desc += f'-gamma{gamma:g}' + args.loss_args.r1_gamma = gamma + + if kimg is not None: + assert isinstance(kimg, int) + if not kimg >= 1: + raise UserError('--kimg must be at least 1') + desc += f'-kimg{kimg:d}' + args.total_kimg = kimg + + # --------------------------------------------------- + # Discriminator augmentation: aug, p, target, augpipe + # --------------------------------------------------- + + if aug is None: + aug = 'ada' + else: + assert isinstance(aug, str) + desc += f'-{aug}' + + if aug == 'ada': + args.augment_args.tune_heuristic = 'rt' + args.augment_args.tune_target = 0.6 + + elif aug == 'noaug': + pass + + elif aug == 'fixed': + if p is None: + raise UserError(f'--aug={aug} requires specifying --p') + + elif aug == 'adarv': + if not validation_set_available: + raise UserError(f'--aug={aug} requires separate validation set; please see "python dataset_tool.py pack -h"') + args.augment_args.tune_heuristic = 'rv' + args.augment_args.tune_target = 0.5 + + else: + raise UserError(f'--aug={aug} not supported') + + if p is not None: + assert isinstance(p, float) + if aug != 'fixed': + raise UserError('--p can only be specified with --aug=fixed') + if not 0 <= p <= 1: + raise UserError('--p must be between 0 and 1') + desc += f'-p{p:g}' + args.augment_args.initial_strength = p + + if target is not None: + assert isinstance(target, float) + if aug not in ['ada', 'adarv']: + raise UserError('--target can only be specified with --aug=ada or --aug=adarv') + if not 0 <= target <= 1: + raise UserError('--target must be between 0 and 1') + desc += f'-target{target:g}' + args.augment_args.tune_target = target + + assert augpipe is None or isinstance(augpipe, str) + if augpipe is None: + augpipe = 'bgc' + else: + if aug == 'noaug': + raise UserError('--augpipe cannot be specified with --aug=noaug') + desc += f'-{augpipe}' + + augpipe_specs = { + 'blit': dict(xflip=1, rotate90=1, xint=1), + 'geom': dict(scale=1, rotate=1, aniso=1, xfrac=1), + 'color': dict(brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1), + 'filter': dict(imgfilter=1), + 'noise': dict(noise=1), + 'cutout': dict(cutout=1), + 'bg': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1), + 'bgc': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1), + 'bgcf': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1), + 'bgcfn': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1, noise=1), + 'bgcfnc': dict(xflip=1, rotate90=1, xint=1, scale=1, rotate=1, aniso=1, xfrac=1, brightness=1, contrast=1, lumaflip=1, hue=1, saturation=1, imgfilter=1, noise=1, cutout=1), + } + + assert augpipe in augpipe_specs + if aug != 'noaug': + args.augment_args.apply_func = 'training.augment.augment_pipeline' + args.augment_args.apply_args = augpipe_specs[augpipe] + + # --------------------------------- + # Comparison methods: cmethod, dcap + # --------------------------------- + + assert cmethod is None or isinstance(cmethod, str) + if cmethod is None: + cmethod = 'nocmethod' + else: + desc += f'-{cmethod}' + + if cmethod == 'nocmethod': + pass + + elif cmethod == 'bcr': + args.loss_args.func_name = 'training.loss.cmethods' + args.loss_args.bcr_real_weight = 10 + args.loss_args.bcr_fake_weight = 10 + args.loss_args.bcr_augment = dnnlib.EasyDict(func_name='training.augment.augment_pipeline', xint=1, xint_max=1/32) + + elif cmethod == 'zcr': + args.loss_args.func_name = 'training.loss.cmethods' + args.loss_args.zcr_gen_weight = 0.02 + args.loss_args.zcr_dis_weight = 0.2 + args.G_args.num_fp16_res = args.D_args.num_fp16_res = 0 # disable mixed-precision training + args.G_args.conv_clamp = args.D_args.conv_clamp = None + + elif cmethod == 'pagan': + if aug != 'noaug': + raise UserError(f'--cmethod={cmethod} is not compatible with discriminator augmentation; please specify --aug=noaug') + args.D_args.use_pagan = True + args.augment_args.tune_heuristic = 'rt' # enable ada heuristic + args.augment_args.pop('apply_func', None) # disable discriminator augmentation + args.augment_args.pop('apply_args', None) + args.augment_args.tune_target = 0.95 + + elif cmethod == 'wgangp': + if aug != 'noaug': + raise UserError(f'--cmethod={cmethod} is not compatible with discriminator augmentation; please specify --aug=noaug') + if gamma is not None: + raise UserError(f'--cmethod={cmethod} is not compatible with --gamma') + args.loss_args = dnnlib.EasyDict(func_name='training.loss.wgangp') + args.G_opt_args.learning_rate = args.D_opt_args.learning_rate = 0.001 + args.G_args.num_fp16_res = args.D_args.num_fp16_res = 0 # disable mixed-precision training + args.G_args.conv_clamp = args.D_args.conv_clamp = None + args.lazy_regularization = False + + elif cmethod == 'auxrot': + if args.train_dataset_args.max_label_size > 0: + raise UserError(f'--cmethod={cmethod} is not compatible with label conditioning; please specify a dataset without labels') + args.loss_args.func_name = 'training.loss.cmethods' + args.loss_args.auxrot_alpha = 10 + args.loss_args.auxrot_beta = 5 + args.D_args.score_max = 5 # prepare D to output 5 scalars per image instead of just 1 + + elif cmethod == 'spectralnorm': + args.D_args.use_spectral_norm = True + + elif cmethod == 'shallowmap': + if args.G_args.mapping_layers == 2: + raise UserError(f'--cmethod={cmethod} is a no-op for --cfg={cfg}') + args.G_args.mapping_layers = 2 + + elif cmethod == 'adropout': + if aug != 'noaug': + raise UserError(f'--cmethod={cmethod} is not compatible with discriminator augmentation; please specify --aug=noaug') + args.D_args.adaptive_dropout = 1 + args.augment_args.tune_heuristic = 'rt' # enable ada heuristic + args.augment_args.pop('apply_func', None) # disable discriminator augmentation + args.augment_args.pop('apply_args', None) + args.augment_args.tune_target = 0.6 + + else: + raise UserError(f'--cmethod={cmethod} not supported') + + if dcap is not None: + assert isinstance(dcap, float) + if not dcap > 0: + raise UserError('--dcap must be positive') + desc += f'-dcap{dcap:g}' + args.D_args.fmap_base = max(int(args.D_args.fmap_base * dcap), 1) + args.D_args.fmap_max = max(int(args.D_args.fmap_max * dcap), 1) + + # ---------------------------------- + # Transfer learning: resume, freezed + # ---------------------------------- + + resume_specs = { + 'ffhq256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/ffhq-res256-mirror-paper256-noaug.pkl', + 'ffhq512': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/ffhq-res512-mirror-stylegan2-noaug.pkl', + 'ffhq1024': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/ffhq-res1024-mirror-stylegan2-noaug.pkl', + 'celebahq256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/celebahq-res256-mirror-paper256-kimg100000-ada-target0.5.pkl', + 'lsundog256': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada/pretrained/transfer-learning-source-nets/lsundog-res256-paper256-kimg100000-noaug.pkl', + } + + assert resume is None or isinstance(resume, str) + if resume is None: + resume = 'noresume' + elif resume == 'noresume': + desc += '-noresume' + elif resume in resume_specs: + desc += f'-resume{resume}' + args.resume_pkl = resume_specs[resume] # predefined url + else: + desc += '-resumecustom' + args.resume_pkl = resume # custom path or url + + if resume != 'noresume': + args.augment_args.tune_kimg = 100 # make ADA react faster at the beginning + args.G_smoothing_rampup = None # disable EMA rampup + + if freezed is not None: + assert isinstance(freezed, int) + if not freezed >= 0: + raise UserError('--freezed must be non-negative') + desc += f'-freezed{freezed:d}' + args.D_args.freeze_layers = freezed + + return desc, args + +#---------------------------------------------------------------------------- + +def run_training(outdir, seed, dry_run, **hyperparam_options): + # Setup training options. + tflib.init_tf({'rnd.np_random_seed': seed}) + run_desc, training_options = setup_training_options(**hyperparam_options) + + # Pick output directory. + prev_run_dirs = [] + if os.path.isdir(outdir): + prev_run_dirs = [x for x in os.listdir(outdir) if os.path.isdir(os.path.join(outdir, x))] + prev_run_ids = [re.match(r'^\d+', x) for x in prev_run_dirs] + prev_run_ids = [int(x.group()) for x in prev_run_ids if x is not None] + cur_run_id = max(prev_run_ids, default=-1) + 1 + training_options.run_dir = os.path.join(outdir, f'{cur_run_id:05d}-{run_desc}') + assert not os.path.exists(training_options.run_dir) + + # Print options. + print() + print('Training options:') + print(json.dumps(training_options, indent=2)) + print() + print(f'Output directory: {training_options.run_dir}') + print(f'Training data: {training_options.train_dataset_args.path}') + print(f'Training length: {training_options.total_kimg} kimg') + print(f'Resolution: {training_options.train_dataset_args.resolution}') + print(f'Number of GPUs: {training_options.num_gpus}') + print() + + # Dry run? + if dry_run: + print('Dry run; exiting.') + return + + # Kick off training. + print('Creating output directory...') + os.makedirs(training_options.run_dir) + with open(os.path.join(training_options.run_dir, 'training_options.json'), 'wt') as f: + json.dump(training_options, f, indent=2) + with dnnlib.util.Logger(os.path.join(training_options.run_dir, 'log.txt')): + training_loop.training_loop(**training_options) + +#---------------------------------------------------------------------------- + +def _str_to_bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + if v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + raise argparse.ArgumentTypeError('Boolean value expected.') + +def _parse_comma_sep(s): + if s is None or s.lower() == 'none' or s == '': + return [] + return s.split(',') + +#---------------------------------------------------------------------------- + +_cmdline_help_epilog = '''examples: + + # Train custom dataset using 1 GPU. + python %(prog)s --outdir=~/training-runs --gpus=1 --data=~/datasets/custom + + # Train class-conditional CIFAR-10 using 2 GPUs. + python %(prog)s --outdir=~/training-runs --gpus=2 --data=~/datasets/cifar10c \\ + --cfg=cifar + + # Transfer learn MetFaces from FFHQ using 4 GPUs. + python %(prog)s --outdir=~/training-runs --gpus=4 --data=~/datasets/metfaces \\ + --cfg=paper1024 --mirror=1 --resume=ffhq1024 --snap=10 + + # Reproduce original StyleGAN2 config F. + python %(prog)s --outdir=~/training-runs --gpus=8 --data=~/datasets/ffhq \\ + --cfg=stylegan2 --res=1024 --mirror=1 --aug=noaug + +available base configs (--cfg): + auto Automatically select reasonable defaults based on resolution + and GPU count. Good starting point for new datasets. + stylegan2 Reproduce results for StyleGAN2 config F at 1024x1024. + paper256 Reproduce results for FFHQ and LSUN Cat at 256x256. + paper512 Reproduce results for BreCaHAD and AFHQ at 512x512. + paper1024 Reproduce results for MetFaces at 1024x1024. + cifar Reproduce results for CIFAR-10 (tuned configuration). + cifarbaseline Reproduce results for CIFAR-10 (baseline configuration). + +transfer learning source networks (--resume): + ffhq256 FFHQ trained at 256x256 resolution. + ffhq512 FFHQ trained at 512x512 resolution. + ffhq1024 FFHQ trained at 1024x1024 resolution. + celebahq256 CelebA-HQ trained at 256x256 resolution. + lsundog256 LSUN Dog trained at 256x256 resolution. + Custom network pickle. +''' + +#---------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description='Train a GAN using the techniques described in the paper\n"Training Generative Adversarial Networks with Limited Data".', + epilog=_cmdline_help_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + group = parser.add_argument_group('general options') + group.add_argument('--outdir', help='Where to save the results (required)', required=True, metavar='DIR') + group.add_argument('--gpus', help='Number of GPUs to use (default: 1 gpu)', type=int, metavar='INT') + group.add_argument('--snap', help='Snapshot interval (default: 50 ticks)', type=int, metavar='INT') + group.add_argument('--seed', help='Random seed (default: %(default)s)', type=int, default=1000, metavar='INT') + group.add_argument('-n', '--dry-run', help='Print training options and exit', action='store_true', default=False) + + group = parser.add_argument_group('training dataset') + group.add_argument('--data', help='Training dataset path (required)', metavar='PATH', required=True) + group.add_argument('--res', help='Dataset resolution (default: highest available)', type=int, metavar='INT') + group.add_argument('--mirror', help='Augment dataset with x-flips (default: false)', type=_str_to_bool, metavar='BOOL') + + group = parser.add_argument_group('metrics') + group.add_argument('--metrics', help='Comma-separated list or "none" (default: fid50k_full)', type=_parse_comma_sep, metavar='LIST') + group.add_argument('--metricdata', help='Dataset to evaluate metrics against (optional)', metavar='PATH') + + group = parser.add_argument_group('base config') + group.add_argument('--cfg', help='Base config (default: auto)', choices=['auto', 'stylegan2', 'paper256', 'paper512', 'paper1024', 'cifar', 'cifarbaseline']) + group.add_argument('--gamma', help='Override R1 gamma', type=float, metavar='FLOAT') + group.add_argument('--kimg', help='Override training duration', type=int, metavar='INT') + + group = parser.add_argument_group('discriminator augmentation') + group.add_argument('--aug', help='Augmentation mode (default: ada)', choices=['noaug', 'ada', 'fixed', 'adarv']) + group.add_argument('--p', help='Specify augmentation probability for --aug=fixed', type=float, metavar='FLOAT') + group.add_argument('--target', help='Override ADA target for --aug=ada and --aug=adarv', type=float) + group.add_argument('--augpipe', help='Augmentation pipeline (default: bgc)', choices=['blit', 'geom', 'color', 'filter', 'noise', 'cutout', 'bg', 'bgc', 'bgcf', 'bgcfn', 'bgcfnc']) + + group = parser.add_argument_group('comparison methods') + group.add_argument('--cmethod', help='Comparison method (default: nocmethod)', choices=['nocmethod', 'bcr', 'zcr', 'pagan', 'wgangp', 'auxrot', 'spectralnorm', 'shallowmap', 'adropout']) + group.add_argument('--dcap', help='Multiplier for discriminator capacity', type=float, metavar='FLOAT') + + group = parser.add_argument_group('transfer learning') + group.add_argument('--resume', help='Resume from network pickle (default: noresume)') + group.add_argument('--freezed', help='Freeze-D (default: 0 discriminator layers)', type=int, metavar='INT') + + args = parser.parse_args() + try: + run_training(**vars(args)) + except UserError as err: + print(f'Error: {err}') + exit(1) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/__init__.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c61c745d36a1e35568fe4310c780c34414173e0 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/augment.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..2d32c953462ce1960986f3b750505fb005ee0818 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/augment.py @@ -0,0 +1,587 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Adaptive discriminator augmentation (ADA) from the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import numpy as np +import tensorflow as tf +import scipy.signal +import dnnlib +import dnnlib.tflib as tflib + +from training import loss + +#---------------------------------------------------------------------------- +# Main class for adaptive discriminator augmentation (ADA). +# - Performs adaptive tuning of augmentation strength during training. +# - Acts as a wrapper for the augmentation pipeline. +# - Keeps track of the necessary training statistics. +# - Calculates statistics for the validation set, if available. + +class AdaptiveAugment: + def __init__(self, + apply_func = None, # Function representing the augmentation pipeline. Can be a fully-qualified name, a function object, or None. + apply_args = {}, # Keyword arguments for the augmentation pipeline. + initial_strength = 0, # Augmentation strength (p) to use initially. + tune_heuristic = None, # Heuristic for tuning the augmentation strength dynamically: 'rt', 'rv', None. + tune_target = None, # Target value for the selected heuristic. + tune_kimg = 500, # Adjustment speed, measured in how many kimg it takes for the strength to increase/decrease by one unit. + stat_decay_kimg = 0, # Exponential moving average to use for training statistics, measured as the half-life in kimg. 0 = disable EMA. + ): + tune_stats = { + 'rt': {'Loss/signs/real'}, + 'rv': {'Loss/scores/fake', 'Loss/scores/real', 'Loss/scores/valid'}, + None: {}, + } + assert tune_heuristic in tune_stats + assert apply_func is None or isinstance(apply_func, str) or dnnlib.util.is_top_level_function(apply_func) + + # Configuration. + self.apply_func = dnnlib.util.get_obj_by_name(apply_func) if isinstance(apply_func, str) else apply_func + self.apply_args = apply_args + self.strength = initial_strength + self.tune_heuristic = tune_heuristic + self.tune_target = tune_target + self.tune_kimg = tune_kimg + self.stat_decay_kimg = stat_decay_kimg + + # Runtime state. + self._tune_stats = tune_stats[tune_heuristic] + self._strength_var = None + self._acc_vars = dict() # {name: [var, ...], ...} + self._acc_decay_in = None + self._acc_decay_ops = dict() # {name: op, ...} + self._valid_images = None + self._valid_labels = None + self._valid_images_in = None + self._valid_labels_in = None + self._valid_op = None + self._valid_ofs = 0 + + def init_validation_set(self, D_gpus, training_set): + assert self._valid_images is None + images, labels = training_set.load_validation_set_np() + if images.shape[0] == 0: + return + self._valid_images = images + self._valid_labels = labels + + # Build validation graph. + with tflib.absolute_name_scope('Validation'), tf.control_dependencies(None): + with tf.device('/cpu:0'): + self._valid_images_in = tf.placeholder(training_set.dtype, name='valid_images_in', shape=[None]+training_set.shape) + self._valid_labels_in = tf.placeholder(training_set.label_dtype, name='valid_labels_in', shape=[None,training_set.label_size]) + images_in_gpus = tf.split(self._valid_images_in, len(D_gpus)) + labels_in_gpus = tf.split(self._valid_labels_in, len(D_gpus)) + ops = [] + for gpu, (D_gpu, images_in_gpu, labels_in_gpu) in enumerate(zip(D_gpus, images_in_gpus, labels_in_gpus)): + with tf.device(f'/gpu:{gpu}'): + images_expr = tf.cast(images_in_gpu, tf.float32) * (2 / 255) - 1 + D_valid = loss.eval_D(D_gpu, self, images_expr, labels_in_gpu, report='valid') + ops += [D_valid.scores] + self._valid_op = tf.group(*ops) + + def apply(self, images, labels, enable=True): + if not enable or self.apply_func is None or (self.strength == 0 and self.tune_heuristic is None): + return images, labels + with tf.name_scope('Augment'): + images, labels = self.apply_func(images, labels, strength=self.get_strength_var(), **self.apply_args) + return images, labels + + def get_strength_var(self): + if self._strength_var is None: + with tflib.absolute_name_scope('Augment'), tf.control_dependencies(None): + self._strength_var = tf.Variable(np.float32(self.strength), name='strength', trainable=False) + return self._strength_var + + def report_stat(self, name, expr): + if name in self._tune_stats: + expr = self._increment_acc(name, expr) + return expr + + def tune(self, nimg_delta): + acc = {name: self._read_and_decay_acc(name, nimg_delta) for name in self._tune_stats} + nimg_ratio = nimg_delta / (self.tune_kimg * 1000) + strength = self.strength + + if self.tune_heuristic == 'rt': + assert self.tune_target is not None + rt = acc['Loss/signs/real'] + strength += nimg_ratio * np.sign(rt - self.tune_target) + + if self.tune_heuristic == 'rv': + assert self.tune_target is not None + assert self._valid_images is not None + rv = (acc['Loss/scores/real'] - acc['Loss/scores/valid']) / max(acc['Loss/scores/real'] - acc['Loss/scores/fake'], 1e-8) + strength += nimg_ratio * np.sign(rv - self.tune_target) + + self._set_strength(strength) + + def run_validation(self, minibatch_size): + if self._valid_images is not None: + indices = [(self._valid_ofs + i) % self._valid_images.shape[0] for i in range(minibatch_size)] + tflib.run(self._valid_op, {self._valid_images_in: self._valid_images[indices], self._valid_labels_in: self._valid_labels[indices]}) + self._valid_ofs += len(indices) + + def _set_strength(self, strength): + strength = max(strength, 0) + if self._strength_var is not None and strength != self.strength: + tflib.set_vars({self._strength_var: strength}) + self.strength = strength + + def _increment_acc(self, name, expr): + with tf.name_scope('acc_' + name): + with tf.control_dependencies(None): + acc_var = tf.Variable(tf.zeros(2), name=name, trainable=False) # [acc_num, acc_sum] + if name not in self._acc_vars: + self._acc_vars[name] = [] + self._acc_vars[name].append(acc_var) + expr_num = tf.shape(tf.reshape(expr, [-1]))[0] + expr_sum = tf.reduce_sum(expr) + acc_op = tf.assign_add(acc_var, [expr_num, expr_sum]) + with tf.control_dependencies([acc_op]): + return tf.identity(expr) + + def _read_and_decay_acc(self, name, nimg_delta): + acc_vars = self._acc_vars[name] + acc_num, acc_sum = tuple(np.sum(tflib.run(acc_vars), axis=0)) + if nimg_delta > 0: + with tflib.absolute_name_scope('Augment'), tf.control_dependencies(None): + if self._acc_decay_in is None: + self._acc_decay_in = tf.placeholder(tf.float32, name='acc_decay_in', shape=[]) + if name not in self._acc_decay_ops: + with tf.name_scope('acc_' + name): + ops = [tf.assign(var, var * self._acc_decay_in) for var in acc_vars] + self._acc_decay_ops[name] = tf.group(*ops) + acc_decay = 0.5 ** (nimg_delta / (self.stat_decay_kimg * 1000)) if self.stat_decay_kimg > 0 else 0 + tflib.run(self._acc_decay_ops[name], {self._acc_decay_in: acc_decay}) + return acc_sum / acc_num if acc_num > 0 else 0 + +#---------------------------------------------------------------------------- +# Helper for randomly gating augmentation parameters based on the given probability. + +def gate_augment_params(probability, params, disabled_val): + shape = tf.shape(params) + cond = (tf.random_uniform(shape[:1], 0, 1) < probability) + disabled_val = tf.broadcast_to(tf.convert_to_tensor(disabled_val, dtype=params.dtype), shape) + return tf.where(cond, params, disabled_val) + +#---------------------------------------------------------------------------- +# Helpers for constructing batched transformation matrices. + +def construct_batch_of_matrices(*rows): + rows = [[tf.convert_to_tensor(x, dtype=tf.float32) for x in r] for r in rows] + batch_elems = [x for r in rows for x in r if x.shape.rank != 0] + assert all(x.shape.rank == 1 for x in batch_elems) + batch_size = tf.shape(batch_elems[0])[0] if len(batch_elems) else 1 + rows = [[tf.broadcast_to(x, [batch_size]) for x in r] for r in rows] + return tf.transpose(rows, [2, 0, 1]) + +def translate_2d(tx, ty): + return construct_batch_of_matrices( + [1, 0, tx], + [0, 1, ty], + [0, 0, 1]) + +def translate_3d(tx, ty, tz): + return construct_batch_of_matrices( + [1, 0, 0, tx], + [0, 1, 0, ty], + [0, 0, 1, tz], + [0, 0, 0, 1]) + +def scale_2d(sx, sy): + return construct_batch_of_matrices( + [sx, 0, 0], + [0, sy, 0], + [0, 0, 1]) + +def scale_3d(sx, sy, sz): + return construct_batch_of_matrices( + [sx, 0, 0, 0], + [0, sy, 0, 0], + [0, 0, sz, 0], + [0, 0, 0, 1]) + +def rotate_2d(theta): + return construct_batch_of_matrices( + [tf.cos(theta), tf.sin(-theta), 0], + [tf.sin(theta), tf.cos(theta), 0], + [0, 0, 1]) + +def rotate_3d(v, theta): + vx = v[..., 0]; vy = v[..., 1]; vz = v[..., 2] + s = tf.sin(theta); c = tf.cos(theta); cc = 1 - c + return construct_batch_of_matrices( + [vx*vx*cc+c, vx*vy*cc-vz*s, vx*vz*cc+vy*s, 0], + [vy*vx*cc+vz*s, vy*vy*cc+c, vy*vz*cc-vx*s, 0], + [vz*vx*cc-vy*s, vz*vy*cc+vx*s, vz*vz*cc+c, 0], + [0, 0, 0, 1]) + +def translate_2d_inv(tx, ty): + return translate_2d(-tx, -ty) + +def scale_2d_inv(sx, sy): + return scale_2d(1/sx, 1/sy) + +def rotate_2d_inv(theta): + return rotate_2d(-theta) + +#---------------------------------------------------------------------------- +# Coefficients of various wavelet decomposition low-pass filters. + +wavelets = { + 'haar': [0.7071067811865476, 0.7071067811865476], + 'db1': [0.7071067811865476, 0.7071067811865476], + 'db2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025], + 'db3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569], + 'db4': [-0.010597401784997278, 0.032883011666982945, 0.030841381835986965, -0.18703481171888114, -0.02798376941698385, 0.6308807679295904, 0.7148465705525415, 0.23037781330885523], + 'db5': [0.003335725285001549, -0.012580751999015526, -0.006241490213011705, 0.07757149384006515, -0.03224486958502952, -0.24229488706619015, 0.13842814590110342, 0.7243085284385744, 0.6038292697974729, 0.160102397974125], + 'db6': [-0.00107730108499558, 0.004777257511010651, 0.0005538422009938016, -0.031582039318031156, 0.02752286553001629, 0.09750160558707936, -0.12976686756709563, -0.22626469396516913, 0.3152503517092432, 0.7511339080215775, 0.4946238903983854, 0.11154074335008017], + 'db7': [0.0003537138000010399, -0.0018016407039998328, 0.00042957797300470274, 0.012550998556013784, -0.01657454163101562, -0.03802993693503463, 0.0806126091510659, 0.07130921926705004, -0.22403618499416572, -0.14390600392910627, 0.4697822874053586, 0.7291320908465551, 0.39653931948230575, 0.07785205408506236], + 'db8': [-0.00011747678400228192, 0.0006754494059985568, -0.0003917403729959771, -0.00487035299301066, 0.008746094047015655, 0.013981027917015516, -0.04408825393106472, -0.01736930100202211, 0.128747426620186, 0.00047248457399797254, -0.2840155429624281, -0.015829105256023893, 0.5853546836548691, 0.6756307362980128, 0.3128715909144659, 0.05441584224308161], + 'sym2': [-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025], + 'sym3': [0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569], + 'sym4': [-0.07576571478927333, -0.02963552764599851, 0.49761866763201545, 0.8037387518059161, 0.29785779560527736, -0.09921954357684722, -0.012603967262037833, 0.0322231006040427], + 'sym5': [0.027333068345077982, 0.029519490925774643, -0.039134249302383094, 0.1993975339773936, 0.7234076904024206, 0.6339789634582119, 0.01660210576452232, -0.17532808990845047, -0.021101834024758855, 0.019538882735286728], + 'sym6': [0.015404109327027373, 0.0034907120842174702, -0.11799011114819057, -0.048311742585633, 0.4910559419267466, 0.787641141030194, 0.3379294217276218, -0.07263752278646252, -0.021060292512300564, 0.04472490177066578, 0.0017677118642428036, -0.007800708325034148], + 'sym7': [0.002681814568257878, -0.0010473848886829163, -0.01263630340325193, 0.03051551316596357, 0.0678926935013727, -0.049552834937127255, 0.017441255086855827, 0.5361019170917628, 0.767764317003164, 0.2886296317515146, -0.14004724044296152, -0.10780823770381774, 0.004010244871533663, 0.010268176708511255], + 'sym8': [-0.0033824159510061256, -0.0005421323317911481, 0.03169508781149298, 0.007607487324917605, -0.1432942383508097, -0.061273359067658524, 0.4813596512583722, 0.7771857517005235, 0.3644418948353314, -0.05194583810770904, -0.027219029917056003, 0.049137179673607506, 0.003808752013890615, -0.01495225833704823, -0.0003029205147213668, 0.0018899503327594609], +} + +#---------------------------------------------------------------------------- +# Versatile image augmentation pipeline from the paper +# "Training Generative Adversarial Networks with Limited Data". +# +# All augmentations are disabled by default; individual augmentations can +# be enabled by setting their probability multipliers to 1. + +def augment_pipeline( + images, # Input images: NCHW, float32, dynamic range [-1,+1]. + labels, # Input labels. + strength = 1, # Overall multiplier for augmentation probability; can be a Tensor. + debug_percentile = None, # Percentile value for visualizing parameter ranges; None = normal operation. + + # Pixel blitting. + xflip = 0, # Probability multiplier for x-flip. + rotate90 = 0, # Probability multiplier for 90 degree rotations. + xint = 0, # Probability multiplier for integer translation. + xint_max = 0.125, # Range of integer translation, relative to image dimensions. + + # General geometric transformations. + scale = 0, # Probability multiplier for isotropic scaling. + rotate = 0, # Probability multiplier for arbitrary rotation. + aniso = 0, # Probability multiplier for anisotropic scaling. + xfrac = 0, # Probability multiplier for fractional translation. + scale_std = 0.2, # Log2 standard deviation of isotropic scaling. + rotate_max = 1, # Range of arbitrary rotation, 1 = full circle. + aniso_std = 0.2, # Log2 standard deviation of anisotropic scaling. + xfrac_std = 0.125, # Standard deviation of frational translation, relative to image dimensions. + + # Color transformations. + brightness = 0, # Probability multiplier for brightness. + contrast = 0, # Probability multiplier for contrast. + lumaflip = 0, # Probability multiplier for luma flip. + hue = 0, # Probability multiplier for hue rotation. + saturation = 0, # Probability multiplier for saturation. + brightness_std = 0.2, # Standard deviation of brightness. + contrast_std = 0.5, # Log2 standard deviation of contrast. + hue_max = 1, # Range of hue rotation, 1 = full circle. + saturation_std = 1, # Log2 standard deviation of saturation. + + # Image-space filtering. + imgfilter = 0, # Probability multiplier for image-space filtering. + imgfilter_bands = [1,1,1,1], # Probability multipliers for individual frequency bands. + imgfilter_std = 1, # Log2 standard deviation of image-space filter amplification. + + # Image-space corruptions. + noise = 0, # Probability multiplier for additive RGB noise. + cutout = 0, # Probability multiplier for cutout. + noise_std = 0.1, # Standard deviation of additive RGB noise. + cutout_size = 0.5, # Size of the cutout rectangle, relative to image dimensions. +): + # Determine input shape. + batch, channels, height, width = images.shape.as_list() + if batch is None: + batch = tf.shape(images)[0] + + # ------------------------------------- + # Select parameters for pixel blitting. + # ------------------------------------- + + # Initialize inverse homogeneous 2D transform: G_inv @ pixel_out ==> pixel_in + I_3 = tf.eye(3, batch_shape=[batch]) + G_inv = I_3 + + # Apply x-flip with probability (xflip * strength). + if xflip > 0: + i = tf.floor(tf.random_uniform([batch], 0, 2)) + i = gate_augment_params(xflip * strength, i, 0) + if debug_percentile is not None: + i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2) + G_inv @= scale_2d_inv(1 - 2 * i, 1) + + # Apply 90 degree rotations with probability (rotate90 * strength). + if rotate90 > 0: + i = tf.floor(tf.random_uniform([batch], 0, 4)) + i = gate_augment_params(rotate90 * strength, i, 0) + if debug_percentile is not None: + i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 4) + G_inv @= rotate_2d_inv(-np.pi / 2 * i) + + # Apply integer translation with probability (xint * strength). + if xint > 0: + t = tf.random_uniform([batch, 2], -xint_max, xint_max) + t = gate_augment_params(xint * strength, t, 0) + if debug_percentile is not None: + t = (tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xint_max + G_inv @= translate_2d_inv(tf.rint(t[:,0] * width), tf.rint(t[:,1] * height)) + + # -------------------------------------------------------- + # Select parameters for general geometric transformations. + # -------------------------------------------------------- + + # Apply isotropic scaling with probability (scale * strength). + if scale > 0: + s = 2 ** tf.random_normal([batch], 0, scale_std) + s = gate_augment_params(scale * strength, s, 1) + if debug_percentile is not None: + s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * scale_std) + G_inv @= scale_2d_inv(s, s) + + # Apply pre-rotation with probability p_rot. + p_rot = 1 - tf.sqrt(tf.cast(tf.maximum(1 - rotate * strength, 0), tf.float32)) # P(pre OR post) = p + if rotate > 0: + theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max) + theta = gate_augment_params(p_rot, theta, 0) + if debug_percentile is not None: + theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * rotate_max + G_inv @= rotate_2d_inv(-theta) # Before anisotropic scaling. + + # Apply anisotropic scaling with probability (aniso * strength). + if aniso > 0: + s = 2 ** tf.random_normal([batch], 0, aniso_std) + s = gate_augment_params(aniso * strength, s, 1) + if debug_percentile is not None: + s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * aniso_std) + G_inv @= scale_2d_inv(s, 1 / s) + + # Apply post-rotation with probability p_rot. + if rotate > 0: + theta = tf.random_uniform([batch], -np.pi * rotate_max, np.pi * rotate_max) + theta = gate_augment_params(p_rot, theta, 0) + if debug_percentile is not None: + theta = tf.zeros([batch]) + G_inv @= rotate_2d_inv(-theta) # After anisotropic scaling. + + # Apply fractional translation with probability (xfrac * strength). + if xfrac > 0: + t = tf.random_normal([batch, 2], 0, xfrac_std) + t = gate_augment_params(xfrac * strength, t, 0) + if debug_percentile is not None: + t = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch, 2]) * 2 - 1) * xfrac_std + G_inv @= translate_2d_inv(t[:,0] * width, t[:,1] * height) + + # ---------------------------------- + # Execute geometric transformations. + # ---------------------------------- + + # Execute if the transform is not identity. + if G_inv is not I_3: + + # Setup orthogonal lowpass filter. + Hz = wavelets['sym6'] + Hz = np.asarray(Hz, dtype=np.float32) + Hz = np.reshape(Hz, [-1, 1, 1]).repeat(channels, axis=1) # [tap, channel, 1] + Hz_pad = Hz.shape[0] // 4 + + # Calculate padding. + cx = (width - 1) / 2 + cy = (height - 1) / 2 + cp = np.transpose([[-cx, -cy, 1], [cx, -cy, 1], [cx, cy, 1], [-cx, cy, 1]]) # [xyz, idx] + cp = G_inv @ cp[np.newaxis] # [batch, xyz, idx] + cp = cp[:, :2, :] # [batch, xy, idx] + m_lo = tf.ceil(tf.reduce_max(-cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2) + m_hi = tf.ceil(tf.reduce_max( cp, axis=[0,2]) - [cx, cy] + Hz_pad * 2) + m_lo = tf.clip_by_value(m_lo, [0, 0], [width-1, height-1]) + m_hi = tf.clip_by_value(m_hi, [0, 0], [width-1, height-1]) + + # Pad image and adjust origin. + images = tf.transpose(images, [0, 2, 3, 1]) # NCHW => NHWC + pad = [[0, 0], [m_lo[1], m_hi[1]], [m_lo[0], m_hi[0]], [0, 0]] + images = tf.pad(tensor=images, paddings=pad, mode='REFLECT') + T_in = translate_2d(cx + m_lo[0], cy + m_lo[1]) + T_out = translate_2d_inv(cx + Hz_pad, cy + Hz_pad) + G_inv = T_in @ G_inv @ T_out + + # Upsample. + shape = [batch, tf.shape(images)[1] * 2, tf.shape(images)[2] * 2, channels] + images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[np.newaxis, :], out_backprop=images, strides=[1,2,2,1], padding='SAME', data_format='NHWC') + images = tf.nn.depthwise_conv2d_backprop_input(input_sizes=shape, filter=Hz[:, np.newaxis], out_backprop=images, strides=[1,1,1,1], padding='SAME', data_format='NHWC') + G_inv = scale_2d(2, 2) @ G_inv @ scale_2d_inv(2, 2) # Account for the increased resolution. + + # Execute transformation. + transforms = tf.reshape(G_inv, [-1, 9])[:, :8] + shape = [(height + Hz_pad * 2) * 2, (width + Hz_pad * 2) * 2] + images = tf.contrib.image.transform(images=images, transforms=transforms, output_shape=shape, interpolation='BILINEAR') + + # Downsample and crop. + images = tf.nn.depthwise_conv2d(input=images, filter=Hz[np.newaxis,:], strides=[1,1,1,1], padding='SAME', data_format='NHWC') + images = tf.nn.depthwise_conv2d(input=images, filter=Hz[:,np.newaxis], strides=[1,2,2,1], padding='SAME', data_format='NHWC') + images = images[:, Hz_pad : height + Hz_pad, Hz_pad : width + Hz_pad, :] + images = tf.transpose(images, [0, 3, 1, 2]) # NHWC => NCHW + + # -------------------------------------------- + # Select parameters for color transformations. + # -------------------------------------------- + + # Initialize homogeneous 3D transformation matrix: C @ color_in ==> color_out + I_4 = tf.eye(4, batch_shape=[batch]) + C = I_4 + + # Apply brightness with probability (brightness * strength). + if brightness > 0: + b = tf.random_normal([batch], 0, brightness_std) + b = gate_augment_params(brightness * strength, b, 0) + if debug_percentile is not None: + b = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * brightness_std + C = translate_3d(b, b, b) @ C + + # Apply contrast with probability (contrast * strength). + if contrast > 0: + c = 2 ** tf.random_normal([batch], 0, contrast_std) + c = gate_augment_params(contrast * strength, c, 1) + if debug_percentile is not None: + c = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * contrast_std) + C = scale_3d(c, c, c) @ C + + # Apply luma flip with probability (lumaflip * strength). + v = np.array([1, 1, 1, 0]) / np.sqrt(3) # Luma axis. + if lumaflip > 0: + i = tf.floor(tf.random_uniform([batch], 0, 2)) + i = gate_augment_params(lumaflip * strength, i, 0) + if debug_percentile is not None: + i = tf.floor(tf.broadcast_to(debug_percentile, [batch]) * 2) + i = tf.reshape(i, [batch, 1, 1]) + C = (I_4 - 2 * np.outer(v, v) * i) @ C # Householder reflection. + + # Apply hue rotation with probability (hue * strength). + if hue > 0 and channels > 1: + theta = tf.random_uniform([batch], -np.pi * hue_max, np.pi * hue_max) + theta = gate_augment_params(hue * strength, theta, 0) + if debug_percentile is not None: + theta = (tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * np.pi * hue_max + C = rotate_3d(v, theta) @ C # Rotate around v. + + # Apply saturation with probability (saturation * strength). + if saturation > 0 and channels > 1: + s = 2 ** tf.random_normal([batch], 0, saturation_std) + s = gate_augment_params(saturation * strength, s, 1) + if debug_percentile is not None: + s = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * saturation_std) + s = tf.reshape(s, [batch, 1, 1]) + C = (np.outer(v, v) + (I_4 - np.outer(v, v)) * s) @ C + + # ------------------------------ + # Execute color transformations. + # ------------------------------ + + # Execute if the transform is not identity. + if C is not I_4: + images = tf.reshape(images, [batch, channels, height * width]) + if channels == 3: + images = C[:, :3, :3] @ images + C[:, :3, 3:] + elif channels == 1: + C = tf.reduce_mean(C[:, :3, :], axis=1, keepdims=True) + images = images * tf.reduce_sum(C[:, :, :3], axis=2, keepdims=True) + C[:, :, 3:] + else: + raise ValueError('Image must be RGB (3 channels) or L (1 channel)') + images = tf.reshape(images, [batch, channels, height, width]) + + # ---------------------- + # Image-space filtering. + # ---------------------- + + if imgfilter > 0: + num_bands = 4 + assert len(imgfilter_bands) == num_bands + expected_power = np.array([10, 1, 1, 1]) / 13 # Expected power spectrum (1/f). + + # Apply amplification for each band with probability (imgfilter * strength * band_strength). + g = tf.ones([batch, num_bands]) # Global gain vector (identity). + for i, band_strength in enumerate(imgfilter_bands): + t_i = 2 ** tf.random_normal([batch], 0, imgfilter_std) + t_i = gate_augment_params(imgfilter * strength * band_strength, t_i, 1) + if debug_percentile is not None: + t_i = 2 ** (tflib.erfinv(tf.broadcast_to(debug_percentile, [batch]) * 2 - 1) * imgfilter_std) if band_strength > 0 else tf.ones([batch]) + t = tf.ones([batch, num_bands]) # Temporary gain vector. + t = tf.concat([t[:, :i], t_i[:, np.newaxis], t[:, i+1:]], axis=-1) # Replace i'th element. + t /= tf.sqrt(tf.reduce_sum(expected_power * tf.square(t), axis=-1, keepdims=True)) # Normalize power. + g *= t # Accumulate into global gain. + + # Construct filter bank. + Hz_lo = wavelets['sym2'] + Hz_lo = np.asarray(Hz_lo, dtype=np.float32) # H(z) + Hz_hi = Hz_lo * ((-1) ** np.arange(Hz_lo.size)) # H(-z) + Hz_lo2 = np.convolve(Hz_lo, Hz_lo[::-1]) / 2 # H(z) * H(z^-1) / 2 + Hz_hi2 = np.convolve(Hz_hi, Hz_hi[::-1]) / 2 # H(-z) * H(-z^-1) / 2 + Hz_bands = np.eye(num_bands, 1) # Bandpass(H(z), b_i) + for i in range(1, num_bands): + Hz_bands = np.dstack([Hz_bands, np.zeros_like(Hz_bands)]).reshape(num_bands, -1)[:, :-1] + Hz_bands = scipy.signal.convolve(Hz_bands, [Hz_lo2]) + Hz_bands[i, (Hz_bands.shape[1] - Hz_hi2.size) // 2 : (Hz_bands.shape[1] + Hz_hi2.size) // 2] += Hz_hi2 + + # Construct combined amplification filter. + Hz_prime = g @ Hz_bands # [batch, tap] + Hz_prime = tf.transpose(Hz_prime) # [tap, batch] + Hz_prime = tf.tile(Hz_prime[:, :, np.newaxis], [1, 1, channels]) # [tap, batch, channels] + Hz_prime = tf.reshape(Hz_prime, [-1, batch * channels, 1]) # [tap, batch * channels, 1] + + # Apply filter. + images = tf.reshape(images, [1, -1, height, width]) + pad = Hz_bands.shape[1] // 2 + pad = [[0,0], [0,0], [pad, pad], [pad, pad]] + images = tf.pad(tensor=images, paddings=pad, mode='REFLECT') + images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[np.newaxis,:], strides=[1,1,1,1], padding='VALID', data_format='NCHW') + images = tf.nn.depthwise_conv2d(input=images, filter=Hz_prime[:,np.newaxis], strides=[1,1,1,1], padding='VALID', data_format='NCHW') + images = tf.reshape(images, [-1, channels, height, width]) + + # ------------------------ + # Image-space corruptions. + # ------------------------ + + # Apply additive RGB noise with probability (noise * strength). + if noise > 0: + sigma = tf.abs(tf.random_normal([batch], 0, noise_std)) + sigma = gate_augment_params(noise * strength, sigma, 0) + if debug_percentile is not None: + sigma = tflib.erfinv(tf.broadcast_to(debug_percentile, [batch])) * noise_std + sigma = tf.reshape(sigma, [-1, 1, 1, 1]) + images += tf.random_normal([batch, channels, height, width]) * sigma + + # Apply cutout with probability (cutout * strength). + if cutout > 0: + size = tf.fill([batch, 2], cutout_size) + size = gate_augment_params(cutout * strength, size, 0) + center = tf.random_uniform([batch, 2], 0, 1) + if debug_percentile is not None: + size = tf.fill([batch, 2], cutout_size) + center = tf.broadcast_to(debug_percentile, [batch, 2]) + size = tf.reshape(size, [batch, 2, 1, 1, 1]) + center = tf.reshape(center, [batch, 2, 1, 1, 1]) + coord_x = tf.reshape(tf.range(width, dtype=tf.float32), [1, 1, 1, width]) + coord_y = tf.reshape(tf.range(height, dtype=tf.float32), [1, 1, height, 1]) + mask_x = (tf.abs((coord_x + 0.5) / width - center[:, 0]) >= size[:, 0] / 2) + mask_y = (tf.abs((coord_y + 0.5) / height - center[:, 1]) >= size[:, 1] / 2) + mask = tf.cast(tf.logical_or(mask_x, mask_y), tf.float32) + images *= mask + + return images, labels + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/dataset.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b96876ed2bd1b015ea00b43c26e79bf797a5dd66 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/dataset.py @@ -0,0 +1,233 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Streaming images and labels from dataset created with dataset_tool.py.""" + +import os +import glob +import numpy as np +import tensorflow as tf +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- +# Dataset class that loads images from tfrecords files. + +class TFRecordDataset: + def __init__(self, + tfrecord_dir, # Directory containing a collection of tfrecords files. + resolution = None, # Dataset resolution, None = autodetect. + label_file = None, # Relative path of the labels file, None = autodetect. + max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components. + max_images = None, # Maximum number of images to use, None = use all images. + max_validation = 10000, # Maximum size of the validation set, None = use all available images. + mirror_augment = False, # Apply mirror augment? + repeat = True, # Repeat dataset indefinitely? + shuffle = True, # Shuffle images? + shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling. + prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching. + buffer_mb = 256, # Read buffer size (megabytes). + num_threads = 2, # Number of concurrent threads. + _is_validation = False, +): + self.tfrecord_dir = tfrecord_dir + self.resolution = None + self.resolution_log2 = None + self.shape = [] # [channels, height, width] + self.dtype = 'uint8' + self.label_file = label_file + self.label_size = None # components + self.label_dtype = None + self.has_validation_set = None + self.mirror_augment = mirror_augment + self.repeat = repeat + self.shuffle = shuffle + self._max_validation = max_validation + self._np_labels = None + self._tf_minibatch_in = None + self._tf_labels_var = None + self._tf_labels_dataset = None + self._tf_datasets = dict() + self._tf_iterator = None + self._tf_init_ops = dict() + self._tf_minibatch_np = None + self._cur_minibatch = -1 + self._cur_lod = -1 + + # List files in the dataset directory. + assert os.path.isdir(self.tfrecord_dir) + all_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*'))) + self.has_validation_set = (self._max_validation > 0) and any(os.path.basename(f).startswith('validation-') for f in all_files) + all_files = [f for f in all_files if os.path.basename(f).startswith('validation-') == _is_validation] + + # Inspect tfrecords files. + tfr_files = [f for f in all_files if f.endswith('.tfrecords')] + assert len(tfr_files) >= 1 + tfr_shapes = [] + for tfr_file in tfr_files: + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): + tfr_shapes.append(self.parse_tfrecord_np(record).shape) + break + + # Autodetect label filename. + if self.label_file is None: + guess = [f for f in all_files if f.endswith('.labels')] + if len(guess): + self.label_file = guess[0] + elif not os.path.isfile(self.label_file): + guess = os.path.join(self.tfrecord_dir, self.label_file) + if os.path.isfile(guess): + self.label_file = guess + + # Determine shape and resolution. + max_shape = max(tfr_shapes, key=np.prod) + self.resolution = resolution if resolution is not None else max_shape[1] + self.resolution_log2 = int(np.log2(self.resolution)) + self.shape = [max_shape[0], self.resolution, self.resolution] + tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes] + assert all(shape[0] == max_shape[0] for shape in tfr_shapes) + assert all(shape[1] == shape[2] for shape in tfr_shapes) + assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods)) + assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) + + # Load labels. + assert max_label_size == 'full' or max_label_size >= 0 + self._np_labels = np.zeros([1<<30, 0], dtype=np.float32) + if self.label_file is not None and max_label_size != 0: + self._np_labels = np.load(self.label_file) + assert self._np_labels.ndim == 2 + if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: + self._np_labels = self._np_labels[:, :max_label_size] + if max_images is not None and self._np_labels.shape[0] > max_images: + self._np_labels = self._np_labels[:max_images] + self.label_size = self._np_labels.shape[1] + self.label_dtype = self._np_labels.dtype.name + + # Build TF expressions. + with tf.name_scope('Dataset'), tf.device('/cpu:0'), tf.control_dependencies(None): + self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[]) + self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var') + self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var) + for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): + if tfr_lod < 0: + continue + dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20) + if max_images is not None: + dset = dset.take(max_images) + dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads) + dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) + bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize + if self.shuffle and shuffle_mb > 0: + dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1) + if self.repeat: + dset = dset.repeat() + if prefetch_mb > 0: + dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1) + dset = dset.batch(self._tf_minibatch_in) + self._tf_datasets[tfr_lod] = dset + self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) + self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()} + + def close(self): + pass + + # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and lod in self._tf_datasets + if self._cur_minibatch != minibatch_size or self._cur_lod != lod: + self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) + self._cur_minibatch = minibatch_size + self._cur_lod = lod + + # Get next minibatch as TensorFlow expressions. + def get_minibatch_tf(self): + images, labels = self._tf_iterator.get_next() + if self.mirror_augment: + images = tf.cast(images, tf.float32) + images = tf.where(tf.random_uniform([tf.shape(images)[0]]) < 0.5, images, tf.reverse(images, [3])) + images = tf.cast(images, self.dtype) + return images, labels + + # Get next minibatch as NumPy arrays. + def get_minibatch_np(self, minibatch_size, lod=0): # => (images, labels) or (None, None) + self.configure(minibatch_size, lod) + if self._tf_minibatch_np is None: + with tf.name_scope('Dataset'): + self._tf_minibatch_np = self.get_minibatch_tf() + try: + return tflib.run(self._tf_minibatch_np) + except tf.errors.OutOfRangeError: + return None, None + + # Get random labels as TensorFlow expression. + def get_random_labels_tf(self, minibatch_size): # => labels + with tf.name_scope('Dataset'): + if self.label_size > 0: + with tf.device('/cpu:0'): + return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) + return tf.zeros([minibatch_size, 0], self.label_dtype) + + # Get random labels as NumPy array. + def get_random_labels_np(self, minibatch_size): # => labels + if self.label_size > 0: + return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] + return np.zeros([minibatch_size, 0], self.label_dtype) + + # Load validation set as NumPy array. + def load_validation_set_np(self): + images = [] + labels = [] + if self.has_validation_set: + validation_set = TFRecordDataset( + tfrecord_dir=self.tfrecord_dir, resolution=self.shape[2], max_label_size=self.label_size, + max_images=self._max_validation, repeat=False, shuffle=False, prefetch_mb=0, _is_validation=True) + validation_set.configure(1) + while True: + image, label = validation_set.get_minibatch_np(1) + if image is None: + break + images.append(image) + labels.append(label) + images = np.concatenate(images, axis=0) if len(images) else np.zeros([0] + self.shape, dtype=self.dtype) + labels = np.concatenate(labels, axis=0) if len(labels) else np.zeros([0, self.label_size], self.label_dtype) + assert list(images.shape[1:]) == self.shape + assert labels.shape[1] == self.label_size + assert images.shape[0] <= self._max_validation + return images, labels + + # Parse individual image from a tfrecords file into TensorFlow expression. + @staticmethod + def parse_tfrecord_tf(record): + features = tf.parse_single_example(record, features={ + 'shape': tf.FixedLenFeature([3], tf.int64), + 'data': tf.FixedLenFeature([], tf.string)}) + data = tf.decode_raw(features['data'], tf.uint8) + return tf.reshape(data, features['shape']) + + # Parse individual image from a tfrecords file into NumPy array. + @staticmethod + def parse_tfrecord_np(record): + ex = tf.train.Example() + ex.ParseFromString(record) + shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member + data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member + return np.fromstring(data, np.uint8).reshape(shape) + +#---------------------------------------------------------------------------- +# Construct a dataset object using the given options. + +def load_dataset(path=None, resolution=None, max_images=None, max_label_size=0, mirror_augment=False, repeat=True, shuffle=True, seed=None): + _ = seed + assert os.path.isdir(path) + return TFRecordDataset( + tfrecord_dir=path, + resolution=resolution, max_images=max_images, max_label_size=max_label_size, + mirror_augment=mirror_augment, repeat=repeat, shuffle=shuffle) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/loss.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..19a5cfcede4f2a605336a72a7657d8385a35ac10 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/loss.py @@ -0,0 +1,307 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Loss functions.""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +#---------------------------------------------------------------------------- +# Report statistic for all interested parties (AdaptiveAugment and tfevents). + +def report_stat(aug, name, value): + if aug is not None: + value = aug.report_stat(name, value) + value = autosummary(name, value) + return value + +#---------------------------------------------------------------------------- +# Report loss terms and collect them into EasyDict. + +def report_loss(aug, G_loss, D_loss, G_reg=None, D_reg=None): + assert G_loss is not None and D_loss is not None + terms = dnnlib.EasyDict(G_reg=None, D_reg=None) + terms.G_loss = report_stat(aug, 'Loss/G/loss', G_loss) + terms.D_loss = report_stat(aug, 'Loss/D/loss', D_loss) + if G_reg is not None: terms.G_reg = report_stat(aug, 'Loss/G/reg', G_reg) + if D_reg is not None: terms.D_reg = report_stat(aug, 'Loss/D/reg', D_reg) + return terms + +#---------------------------------------------------------------------------- +# Evaluate G and return results as EasyDict. + +def eval_G(G, latents, labels, return_dlatents=False): + r = dnnlib.EasyDict() + r.args = dnnlib.EasyDict() + r.args.is_training = True + if return_dlatents: + r.args.return_dlatents = True + r.images = G.get_output_for(latents, labels, **r.args) + + r.dlatents = None + if return_dlatents: + r.images, r.dlatents = r.images + return r + +#---------------------------------------------------------------------------- +# Evaluate D and return results as EasyDict. + +def eval_D(D, aug, images, labels, report=None, augment_inputs=True, return_aux=0): + r = dnnlib.EasyDict() + r.images_aug = images + r.labels_aug = labels + if augment_inputs and aug is not None: + r.images_aug, r.labels_aug = aug.apply(r.images_aug, r.labels_aug) + + r.args = dnnlib.EasyDict() + r.args.is_training = True + if aug is not None: + r.args.augment_strength = aug.get_strength_var() + if return_aux > 0: + r.args.score_size = return_aux + 1 + r.scores = D.get_output_for(r.images_aug, r.labels_aug, **r.args) + + r.aux = None + if return_aux: + r.aux = r.scores[:, 1:] + r.scores = r.scores[:, :1] + + if report is not None: + report_ops = [ + report_stat(aug, 'Loss/scores/' + report, r.scores), + report_stat(aug, 'Loss/signs/' + report, tf.sign(r.scores)), + report_stat(aug, 'Loss/squares/' + report, tf.square(r.scores)), + ] + with tf.control_dependencies(report_ops): + r.scores = tf.identity(r.scores) + return r + +#---------------------------------------------------------------------------- +# Non-saturating logistic loss with R1 and path length regularizers, used +# in the paper "Analyzing and Improving the Image Quality of StyleGAN". + +def stylegan2(G, D, aug, fake_labels, real_images, real_labels, r1_gamma=10, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2, **_kwargs): + # Evaluate networks for the main loss. + minibatch_size = tf.shape(fake_labels)[0] + fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + G_fake = eval_G(G, fake_latents, fake_labels, return_dlatents=True) + D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') + D_real = eval_D(D, aug, real_images, real_labels, report='real') + + # Non-saturating logistic loss from "Generative Adversarial Nets". + with tf.name_scope('Loss_main'): + G_loss = tf.nn.softplus(-D_fake.scores) # -log(sigmoid(D_fake.scores)), pylint: disable=invalid-unary-operand-type + D_loss = tf.nn.softplus(D_fake.scores) # -log(1 - sigmoid(D_fake.scores)) + D_loss += tf.nn.softplus(-D_real.scores) # -log(sigmoid(D_real.scores)), pylint: disable=invalid-unary-operand-type + G_reg = 0 + D_reg = 0 + + # R1 regularizer from "Which Training Methods for GANs do actually Converge?". + if r1_gamma != 0: + with tf.name_scope('Loss_R1'): + r1_grads = tf.gradients(tf.reduce_sum(D_real.scores), [real_images])[0] + r1_penalty = tf.reduce_sum(tf.square(r1_grads), axis=[1,2,3]) + r1_penalty = report_stat(aug, 'Loss/r1_penalty', r1_penalty) + D_reg += r1_penalty * (r1_gamma * 0.5) + + # Path length regularizer from "Analyzing and Improving the Image Quality of StyleGAN". + if pl_weight != 0: + with tf.name_scope('Loss_PL'): + + # Evaluate the regularization term using a smaller minibatch to conserve memory. + G_pl = G_fake + if pl_minibatch_shrink > 1: + pl_minibatch_size = minibatch_size // pl_minibatch_shrink + pl_latents = fake_latents[:pl_minibatch_size] + pl_labels = fake_labels[:pl_minibatch_size] + G_pl = eval_G(G, pl_latents, pl_labels, return_dlatents=True) + + # Compute |J*y|. + pl_noise = tf.random_normal(tf.shape(G_pl.images)) / np.sqrt(np.prod(G.output_shape[2:])) + pl_grads = tf.gradients(tf.reduce_sum(G_pl.images * pl_noise), [G_pl.dlatents])[0] + pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) + + # Track exponential moving average of |J*y|. + with tf.control_dependencies(None): + pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0, dtype=tf.float32) + pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) + pl_update = tf.assign(pl_mean_var, pl_mean) + + # Calculate (|J*y|-a)^2. + with tf.control_dependencies([pl_update]): + pl_penalty = tf.square(pl_lengths - pl_mean) + pl_penalty = report_stat(aug, 'Loss/pl_penalty', pl_penalty) + + # Apply weight. + # + # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean + # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: + # + # gamma_pl = pl_weight / num_pixels / num_affine_layers + # = 2 / (r^2) / (log2(r) * 2 - 2) + # = 1 / (r^2 * (log2(r) - 1)) + # = ln(2) / (r^2 * (ln(r) - ln(2)) + # + G_reg += tf.tile(pl_penalty, [pl_minibatch_shrink]) * pl_weight + + return report_loss(aug, G_loss, D_loss, G_reg, D_reg) + +#---------------------------------------------------------------------------- +# Hybrid loss used for comparison methods used in the paper +# "Training Generative Adversarial Networks with Limited Data". + +def cmethods(G, D, aug, fake_labels, real_images, real_labels, + r1_gamma=10, r2_gamma=0, + pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2, + bcr_real_weight=0, bcr_fake_weight=0, bcr_augment=None, + zcr_gen_weight=0, zcr_dis_weight=0, zcr_noise_std=0.1, + auxrot_alpha=0, auxrot_beta=0, + **_kwargs, +): + # Evaluate networks for the main loss. + minibatch_size = tf.shape(fake_labels)[0] + fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + G_fake = eval_G(G, fake_latents, fake_labels) + D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') + D_real = eval_D(D, aug, real_images, real_labels, report='real') + + # Non-saturating logistic loss from "Generative Adversarial Nets". + with tf.name_scope('Loss_main'): + G_loss = tf.nn.softplus(-D_fake.scores) # -log(sigmoid(D_fake.scores)), pylint: disable=invalid-unary-operand-type + D_loss = tf.nn.softplus(D_fake.scores) # -log(1 - sigmoid(D_fake.scores)) + D_loss += tf.nn.softplus(-D_real.scores) # -log(sigmoid(D_real.scores)), pylint: disable=invalid-unary-operand-type + G_reg = 0 + D_reg = 0 + + # R1 and R2 regularizers from "Which Training Methods for GANs do actually Converge?". + if r1_gamma != 0 or r2_gamma != 0: + with tf.name_scope('Loss_R1R2'): + if r1_gamma != 0: + r1_grads = tf.gradients(tf.reduce_sum(D_real.scores), [real_images])[0] + r1_penalty = tf.reduce_sum(tf.square(r1_grads), axis=[1,2,3]) + r1_penalty = report_stat(aug, 'Loss/r1_penalty', r1_penalty) + D_reg += r1_penalty * (r1_gamma * 0.5) + if r2_gamma != 0: + r2_grads = tf.gradients(tf.reduce_sum(D_fake.scores), [G_fake.images])[0] + r2_penalty = tf.reduce_sum(tf.square(r2_grads), axis=[1,2,3]) + r2_penalty = report_stat(aug, 'Loss/r2_penalty', r2_penalty) + D_reg += r2_penalty * (r2_gamma * 0.5) + + # Path length regularizer from "Analyzing and Improving the Image Quality of StyleGAN". + if pl_weight != 0: + with tf.name_scope('Loss_PL'): + pl_minibatch_size = minibatch_size // pl_minibatch_shrink + pl_latents = fake_latents[:pl_minibatch_size] + pl_labels = fake_labels[:pl_minibatch_size] + G_pl = eval_G(G, pl_latents, pl_labels, return_dlatents=True) + pl_noise = tf.random_normal(tf.shape(G_pl.images)) / np.sqrt(np.prod(G.output_shape[2:])) + pl_grads = tf.gradients(tf.reduce_sum(G_pl.images * pl_noise), [G_pl.dlatents])[0] + pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) + with tf.control_dependencies(None): + pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0, dtype=tf.float32) + pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) + pl_update = tf.assign(pl_mean_var, pl_mean) + with tf.control_dependencies([pl_update]): + pl_penalty = tf.square(pl_lengths - pl_mean) + pl_penalty = report_stat(aug, 'Loss/pl_penalty', pl_penalty) + G_reg += tf.tile(pl_penalty, [pl_minibatch_shrink]) * pl_weight + + # bCR regularizer from "Improved consistency regularization for GANs". + if (bcr_real_weight != 0 or bcr_fake_weight != 0) and bcr_augment is not None: + with tf.name_scope('Loss_bCR'): + if bcr_real_weight != 0: + bcr_real_images, bcr_real_labels = dnnlib.util.call_func_by_name(D_real.images_aug, D_real.labels_aug, **bcr_augment) + D_bcr_real = eval_D(D, aug, bcr_real_images, bcr_real_labels, report='real_bcr', augment_inputs=False) + bcr_real_penalty = tf.square(D_bcr_real.scores - D_real.scores) + bcr_real_penalty = report_stat(aug, 'Loss/bcr_penalty/real', bcr_real_penalty) + D_loss += bcr_real_penalty * bcr_real_weight # NOTE: Must not use lazy regularization for this term. + if bcr_fake_weight != 0: + bcr_fake_images, bcr_fake_labels = dnnlib.util.call_func_by_name(D_fake.images_aug, D_fake.labels_aug, **bcr_augment) + D_bcr_fake = eval_D(D, aug, bcr_fake_images, bcr_fake_labels, report='fake_bcr', augment_inputs=False) + bcr_fake_penalty = tf.square(D_bcr_fake.scores - D_fake.scores) + bcr_fake_penalty = report_stat(aug, 'Loss/bcr_penalty/fake', bcr_fake_penalty) + D_loss += bcr_fake_penalty * bcr_fake_weight # NOTE: Must not use lazy regularization for this term. + + # zCR regularizer from "Improved consistency regularization for GANs". + if zcr_gen_weight != 0 or zcr_dis_weight != 0: + with tf.name_scope('Loss_zCR'): + zcr_fake_latents = fake_latents + tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) * zcr_noise_std + G_zcr = eval_G(G, zcr_fake_latents, fake_labels) + if zcr_gen_weight > 0: + zcr_gen_penalty = -tf.reduce_mean(tf.square(G_fake.images - G_zcr.images), axis=[1,2,3]) + zcr_gen_penalty = report_stat(aug, 'Loss/zcr_gen_penalty', zcr_gen_penalty) + G_loss += zcr_gen_penalty * zcr_gen_weight + if zcr_dis_weight > 0: + D_zcr = eval_D(D, aug, G_zcr.images, fake_labels, report='fake_zcr', augment_inputs=False) + zcr_dis_penalty = tf.square(D_fake.scores - D_zcr.scores) + zcr_dis_penalty = report_stat(aug, 'Loss/zcr_dis_penalty', zcr_dis_penalty) + D_loss += zcr_dis_penalty * zcr_dis_weight + + # Auxiliary rotation loss from "Self-supervised GANs via auxiliary rotation loss". + if auxrot_alpha != 0 or auxrot_beta != 0: + with tf.name_scope('Loss_AuxRot'): + idx = tf.range(minibatch_size * 4, dtype=tf.int32) // minibatch_size + b0 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 1)) + b1 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 3)) + b2 = tf.logical_or(tf.equal(idx, 0), tf.equal(idx, 2)) + if auxrot_alpha != 0: + auxrot_fake = tf.tile(G_fake.images, [4, 1, 1, 1]) + auxrot_fake = tf.where(b0, auxrot_fake, tf.reverse(auxrot_fake, [2])) + auxrot_fake = tf.where(b1, auxrot_fake, tf.reverse(auxrot_fake, [3])) + auxrot_fake = tf.where(b2, auxrot_fake, tf.transpose(auxrot_fake, [0, 1, 3, 2])) + D_auxrot_fake = eval_D(D, aug, auxrot_fake, fake_labels, return_aux=4) + G_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=idx, logits=D_auxrot_fake.aux) * auxrot_alpha + if auxrot_beta != 0: + auxrot_real = tf.tile(real_images, [4, 1, 1, 1]) + auxrot_real = tf.where(b0, auxrot_real, tf.reverse(auxrot_real, [2])) + auxrot_real = tf.where(b1, auxrot_real, tf.reverse(auxrot_real, [3])) + auxrot_real = tf.where(b2, auxrot_real, tf.transpose(auxrot_real, [0, 1, 3, 2])) + D_auxrot_real = eval_D(D, aug, auxrot_real, real_labels, return_aux=4) + D_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=idx, logits=D_auxrot_real.aux) * auxrot_beta + + return report_loss(aug, G_loss, D_loss, G_reg, D_reg) + +#---------------------------------------------------------------------------- +# WGAN-GP loss with epsilon penalty, used in the paper +# "Progressive Growing of GANs for Improved Quality, Stability, and Variation". + +def wgangp(G, D, aug, fake_labels, real_images, real_labels, wgan_epsilon=0.001, wgan_lambda=10, wgan_target=1, **_kwargs): + minibatch_size = tf.shape(fake_labels)[0] + fake_latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + G_fake = eval_G(G, fake_latents, fake_labels) + D_fake = eval_D(D, aug, G_fake.images, fake_labels, report='fake') + D_real = eval_D(D, aug, real_images, real_labels, report='real') + + # WGAN loss from "Wasserstein Generative Adversarial Networks". + with tf.name_scope('Loss_main'): + G_loss = -D_fake.scores # pylint: disable=invalid-unary-operand-type + D_loss = D_fake.scores - D_real.scores + + # Epsilon penalty from "Progressive Growing of GANs for Improved Quality, Stability, and Variation" + with tf.name_scope('Loss_epsilon'): + epsilon_penalty = report_stat(aug, 'Loss/epsilon_penalty', tf.square(D_real.scores)) + D_loss += epsilon_penalty * wgan_epsilon + + # Gradient penalty from "Improved Training of Wasserstein GANs". + with tf.name_scope('Loss_GP'): + mix_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0, 1, dtype=G_fake.images.dtype) + mix_images = tflib.lerp(tf.cast(real_images, G_fake.images.dtype), G_fake.images, mix_factors) + mix_labels = real_labels # NOTE: Mixing is performed without respect to fake_labels. + D_mix = eval_D(D, aug, mix_images, mix_labels, report='mix') + mix_grads = tf.gradients(tf.reduce_sum(D_mix.scores), [mix_images])[0] + mix_norms = tf.sqrt(tf.reduce_sum(tf.square(mix_grads), axis=[1,2,3])) + mix_norms = report_stat(aug, 'Loss/mix_norms', mix_norms) + gradient_penalty = tf.square(mix_norms - wgan_target) + D_reg = gradient_penalty * (wgan_lambda / (wgan_target**2)) + + return report_loss(aug, G_loss, D_loss, None, D_reg) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/networks.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..6929444d3d7475ddf8de286c80024df4b129006f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/networks.py @@ -0,0 +1,632 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Network architectures from the paper +"Training Generative Adversarial Networks with Limited Data".""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.ops.upfirdn_2d import upsample_2d, downsample_2d, upsample_conv_2d, conv_downsample_2d +from dnnlib.tflib.ops.fused_bias_act import fused_bias_act + +# NOTE: Do not import any application-specific modules here! +# Specify all network parameters as kwargs. + +#---------------------------------------------------------------------------- +# Get/create weight tensor for convolution or fully-connected layer. + +def get_weight(shape, gain=1, equalized_lr=True, lrmul=1, weight_var='weight', trainable=True, use_spectral_norm=False): + fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] for conv2d, [in, out] for fully-connected. + he_std = gain / np.sqrt(fan_in) # He init. + + # Apply equalized learning rate from the paper + # "Progressive Growing of GANs for Improved Quality, Stability, and Variation". + if equalized_lr: + init_std = 1.0 / lrmul + runtime_coef = he_std * lrmul + else: + init_std = he_std / lrmul + runtime_coef = lrmul + + # Create variable. + init = tf.initializers.random_normal(0, init_std) + w = tf.get_variable(weight_var, shape=shape, initializer=init, trainable=trainable) * runtime_coef + if use_spectral_norm: + w = apply_spectral_norm(w, state_var=weight_var+'_sn') + return w + +#---------------------------------------------------------------------------- +# Bias and activation function. + +def apply_bias_act(x, act='linear', gain=None, lrmul=1, clamp=None, bias_var='bias', trainable=True): + b = tf.get_variable(bias_var, shape=[x.shape[1]], initializer=tf.initializers.zeros(), trainable=trainable) * lrmul + return fused_bias_act(x, b=tf.cast(b, x.dtype), act=act, gain=gain, clamp=clamp) + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense_layer(x, fmaps, lrmul=1, weight_var='weight', trainable=True, use_spectral_norm=False): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], lrmul=lrmul, weight_var=weight_var, trainable=trainable, use_spectral_norm=use_spectral_norm) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# 2D convolution op with optional upsampling, downsampling, and padding. + +def conv2d(x, w, up=False, down=False, resample_kernel=None, padding=0): + assert not (up and down) + kernel = w.shape[0].value + assert w.shape[1].value == kernel + assert kernel >= 1 and kernel % 2 == 1 + + w = tf.cast(w, x.dtype) + if up: + x = upsample_conv_2d(x, w, data_format='NCHW', k=resample_kernel, padding=padding) + elif down: + x = conv_downsample_2d(x, w, data_format='NCHW', k=resample_kernel, padding=padding) + else: + padding_mode = {0: 'SAME', -(kernel // 2): 'VALID'}[padding] + x = tf.nn.conv2d(x, w, data_format='NCHW', strides=[1,1,1,1], padding=padding_mode) + return x + +#---------------------------------------------------------------------------- +# 2D convolution layer. + +def conv2d_layer(x, fmaps, kernel, up=False, down=False, resample_kernel=None, lrmul=1, trainable=True, use_spectral_norm=False): + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], lrmul=lrmul, trainable=trainable, use_spectral_norm=use_spectral_norm) + return conv2d(x, tf.cast(w, x.dtype), up=up, down=down, resample_kernel=resample_kernel) + +#---------------------------------------------------------------------------- +# Modulated 2D convolution layer from the paper +# "Analyzing and Improving Image Quality of StyleGAN". + +def modulated_conv2d_layer(x, y, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, lrmul=1, fused_modconv=False, trainable=True, use_spectral_norm=False): + assert not (up and down) + assert kernel >= 1 and kernel % 2 == 1 + + # Get weight. + wshape = [kernel, kernel, x.shape[1].value, fmaps] + w = get_weight(wshape, lrmul=lrmul, trainable=trainable, use_spectral_norm=use_spectral_norm) + if x.dtype.name == 'float16' and not fused_modconv and demodulate: + w *= np.sqrt(1 / np.prod(wshape[:-1])) / tf.reduce_max(tf.abs(w), axis=[0,1,2]) # Pre-normalize to avoid float16 overflow. + ww = w[np.newaxis] # [BkkIO] Introduce minibatch dimension. + + # Modulate. + s = dense_layer(y, fmaps=x.shape[1].value, weight_var='mod_weight', trainable=trainable, use_spectral_norm=use_spectral_norm) # [BI] Transform incoming W to style. + s = apply_bias_act(s, bias_var='mod_bias', trainable=trainable) + 1 # [BI] Add bias (initially 1). + if x.dtype.name == 'float16' and not fused_modconv and demodulate: + s *= 1 / tf.reduce_max(tf.abs(s)) # Pre-normalize to avoid float16 overflow. + ww *= tf.cast(s[:, np.newaxis, np.newaxis, :, np.newaxis], w.dtype) # [BkkIO] Scale input feature maps. + + # Demodulate. + if demodulate: + d = tf.rsqrt(tf.reduce_sum(tf.square(ww), axis=[1,2,3]) + 1e-8) # [BO] Scaling factor. + ww *= d[:, np.newaxis, np.newaxis, np.newaxis, :] # [BkkIO] Scale output feature maps. + + # Reshape/scale input. + if fused_modconv: + x = tf.reshape(x, [1, -1, x.shape[2], x.shape[3]]) # Fused => reshape minibatch to convolution groups. + w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [ww.shape[1], ww.shape[2], ww.shape[3], -1]) + else: + x *= tf.cast(s[:, :, np.newaxis, np.newaxis], x.dtype) # [BIhw] Not fused => scale input activations. + + # 2D convolution. + x = conv2d(x, tf.cast(w, x.dtype), up=up, down=down, resample_kernel=resample_kernel) + + # Reshape/scale output. + if fused_modconv: + x = tf.reshape(x, [-1, fmaps, x.shape[2], x.shape[3]]) # Fused => reshape convolution groups back to minibatch. + elif demodulate: + x *= tf.cast(d[:, :, np.newaxis, np.newaxis], x.dtype) # [BOhw] Not fused => scale output activations. + return x + +#---------------------------------------------------------------------------- +# Normalize 2nd raw moment of the given activation tensor along specified axes. + +def normalize_2nd_moment(x, axis=1, eps=1e-8): + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=axis, keepdims=True) + eps) + +#---------------------------------------------------------------------------- +# Minibatch standard deviation layer from the paper +# "Progressive Growing of GANs for Improved Quality, Stability, and Variation". + +def minibatch_stddev_layer(x, group_size=None, num_new_features=1): + if group_size is None: + group_size = tf.shape(x)[0] + else: + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + + G = group_size + F = num_new_features + _N, C, H, W = x.shape.as_list() + c = C // F + + y = tf.cast(x, tf.float32) # [NCHW] Cast to FP32. + y = tf.reshape(y, [G, -1, F, c, H, W]) # [GnFcHW] Split minibatch N into n groups of size G, and channels C into F groups of size c. + y -= tf.reduce_mean(y, axis=0) # [GnFcHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [nFcHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [nFcHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[2,3,4]) # [nF] Take average over channels and pixels. + y = tf.cast(y, x.dtype) # [nF] Cast back to original data type. + y = tf.reshape(y, [-1, F, 1, 1]) # [nF11] Add missing dimensions. + y = tf.tile(y, [G, 1, H, W]) # [NFHW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append to input as new channels. + +#---------------------------------------------------------------------------- +# Spectral normalization from the paper +# "Spectral Normalization for Generative Adversarial Networks". + +def apply_spectral_norm(w, state_var='sn', iterations=1, eps=1e-8): + fmaps = w.shape[-1].value + w_mat = tf.reshape(w, [-1, fmaps]) + u_var = tf.get_variable(state_var, shape=[1,fmaps], initializer=tf.initializers.random_normal(), trainable=False) + + u = u_var + for _ in range(iterations): + v = tf.matmul(u, w_mat, transpose_b=True) + v *= tf.rsqrt(tf.reduce_sum(tf.square(v)) + eps) + u = tf.matmul(v, w_mat) + sigma_inv = tf.rsqrt(tf.reduce_sum(tf.square(u)) + eps) + u *= sigma_inv + + with tf.control_dependencies([tf.assign(u_var, u)]): + return w * sigma_inv + +#---------------------------------------------------------------------------- +# Main generator network. +# Composed of two sub-networks (mapping and synthesis) that are defined below. + +def G_main( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + + # Evaluation mode. + is_training = False, # Network is under training? Enables and disables specific features. + is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. + return_dlatents = False, # Return dlatents (W) in addition to the images? + + # Truncation & style mixing. + truncation_psi = 0.5, # Style strength multiplier for the truncation trick. None = disable. + truncation_cutoff = None, # Number of layers for which to apply the truncation trick. None = disable. + truncation_psi_val = None, # Value for truncation_psi to use during validation. + truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. + dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. + style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. + + # Sub-networks. + components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. + mapping_func = 'G_mapping', # Build func name for the mapping network. + synthesis_func = 'G_synthesis', # Build func name for the synthesis network. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + + **kwargs, # Arguments for sub-networks (mapping and synthesis). +): + # Validate arguments. + assert not is_training or not is_validation + assert isinstance(components, dnnlib.EasyDict) + if is_validation: + truncation_psi = truncation_psi_val + truncation_cutoff = truncation_cutoff_val + if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): + truncation_psi = None + if is_training: + truncation_cutoff = None + if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): + dlatent_avg_beta = None + if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): + style_mixing_prob = None + + # Setup components. + if 'synthesis' not in components: + components.synthesis = tflib.Network('G_synthesis', func_name=globals()[synthesis_func], **kwargs) + num_layers = components.synthesis.input_shape[1] + dlatent_size = components.synthesis.input_shape[2] + if 'mapping' not in components: + components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs) + + # Evaluate mapping network. + dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs) + dlatents = tf.cast(dlatents, tf.float32) + + # Update moving average of W. + dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) + if dlatent_avg_beta is not None: + with tf.variable_scope('DlatentAvg'): + batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) + update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) + with tf.control_dependencies([update_op]): + dlatents = tf.identity(dlatents) + + # Perform style mixing regularization. + if style_mixing_prob is not None: + with tf.variable_scope('StyleMix'): + latents2 = tf.random_normal(tf.shape(latents_in)) + dlatents2 = components.mapping.get_output_for(latents2, labels_in, is_training=is_training, **kwargs) + dlatents2 = tf.cast(dlatents2, tf.float32) + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + mixing_cutoff = tf.cond( + tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, + lambda: tf.random_uniform([], 1, num_layers, dtype=tf.int32), + lambda: num_layers) + dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) + + # Apply truncation. + if truncation_psi is not None: + with tf.variable_scope('Truncation'): + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + layer_psi = np.ones(layer_idx.shape, dtype=np.float32) + if truncation_cutoff is None: + layer_psi *= truncation_psi + else: + layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi) + dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi) + + # Evaluate synthesis network. + images_out = components.synthesis.get_output_for(dlatents, is_training=is_training, force_clean_graph=is_template_graph, **kwargs) + images_out = tf.identity(images_out, name='images_out') + if return_dlatents: + return images_out, dlatents + return images_out + +#---------------------------------------------------------------------------- +# Generator mapping network. + +def G_mapping( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + + # Input & output dimensions. + latent_size = 512, # Latent vector (Z) dimensionality. + label_size = 0, # Label dimensionality, 0 if no labels. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. + + # Internal details. + mapping_layers = 8, # Number of mapping layers. + mapping_fmaps = None, # Number of activations in the mapping layers, None = same as dlatent_size. + mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. + mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? + label_fmaps = None, # Label embedding dimensionality, None = same as latent_size. + dtype = 'float32', # Data type to use for intermediate activations and outputs. + + **_kwargs, # Ignore unrecognized keyword args. +): + # Inputs. + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + latents_in = tf.cast(latents_in, dtype) + labels_in = tf.cast(labels_in, dtype) + x = latents_in + + # Normalize latents. + if normalize_latents: + with tf.variable_scope('Normalize'): + x = normalize_2nd_moment(x) + + # Embed labels, normalize, and concatenate with latents. + if label_size > 0: + with tf.variable_scope('LabelEmbed'): + fmaps = label_fmaps if label_fmaps is not None else latent_size + y = labels_in + y = apply_bias_act(dense_layer(y, fmaps=fmaps)) + y = normalize_2nd_moment(y) + x = tf.concat([x, y], axis=1) + + # Mapping layers. + for layer_idx in range(mapping_layers): + with tf.variable_scope(f'Dense{layer_idx}'): + fmaps = mapping_fmaps if mapping_fmaps is not None and layer_idx < mapping_layers - 1 else dlatent_size + x = apply_bias_act(dense_layer(x, fmaps=fmaps, lrmul=mapping_lrmul), act=mapping_nonlinearity, lrmul=mapping_lrmul) + + # Broadcast. + if dlatent_broadcast is not None: + with tf.variable_scope('Broadcast'): + x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) + + # Output. + assert x.dtype == tf.as_dtype(dtype) + return tf.identity(x, name='dlatents_out') + +#---------------------------------------------------------------------------- +# Generator synthesis network. + +def G_synthesis( + dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. + + # Input & output dimensions. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + num_channels = 3, # Number of output color channels. + resolution = 1024, # Output resolution. + + # Capacity. + fmap_base = 16384, # Overall multiplier for the number of feature maps. + fmap_decay = 1, # Log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + fmap_const = None, # Number of feature maps in the constant input layer. None = default. + + # Internal details. + use_noise = True, # Enable noise inputs? + randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. + architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + dtype = 'float32', # Data type to use for intermediate activations and outputs. + num_fp16_res = 0, # Use FP16 for the N highest resolutions, regardless of dtype. + conv_clamp = None, # Clamp the output of convolution layers to [-conv_clamp, +conv_clamp], None = disable clamping. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations, None = box filter. + fused_modconv = False, # Implement modulated_conv2d_layer() using grouped convolution? + + **_kwargs, # Ignore unrecognized keyword args. +): + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + assert architecture in ['orig', 'skip', 'resnet'] + act = nonlinearity + num_layers = resolution_log2 * 2 - 2 + + # Disentangled latent (W). + dlatents_in.set_shape([None, num_layers, dlatent_size]) + dlatents_in = tf.cast(dlatents_in, dtype) + + # Noise inputs. + noise_inputs = [] + if use_noise: + for layer_idx in range(num_layers - 1): + res = (layer_idx + 5) // 2 + shape = [1, 1, 2**res, 2**res] + noise_inputs.append(tf.get_variable(f'noise{layer_idx}', shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) + + # Single convolution layer with all the bells and whistles. + def layer(x, layer_idx, fmaps, kernel, up=False): + x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) + if use_noise: + if randomize_noise: + noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) + else: + noise = tf.cast(noise_inputs[layer_idx], x.dtype) + noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) + x += noise * tf.cast(noise_strength, x.dtype) + return apply_bias_act(x, act=act, clamp=conv_clamp) + + # Main block for one resolution. + def block(x, res): # res = 3..resolution_log2 + x = tf.cast(x, 'float16' if res > resolution_log2 - num_fp16_res else dtype) + t = x + with tf.variable_scope('Conv0_up'): + x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) + with tf.variable_scope('Conv1'): + x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) + if architecture == 'resnet': + with tf.variable_scope('Skip'): + t = conv2d_layer(t, fmaps=nf(res-1), kernel=1, up=True, resample_kernel=resample_kernel) + x = (x + t) * (1 / np.sqrt(2)) + return x + + # Upsampling block. + def upsample(y): + with tf.variable_scope('Upsample'): + return upsample_2d(y, k=resample_kernel) + + # ToRGB block. + def torgb(x, y, res): # res = 2..resolution_log2 + with tf.variable_scope('ToRGB'): + t = modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv) + t = apply_bias_act(t, clamp=conv_clamp) + t = tf.cast(t, dtype) + if y is not None: + t += tf.cast(y, t.dtype) + return t + + # Layers for 4x4 resolution. + y = None + with tf.variable_scope('4x4'): + with tf.variable_scope('Const'): + fmaps = fmap_const if fmap_const is not None else nf(1) + x = tf.get_variable('const', shape=[1, fmaps, 4, 4], initializer=tf.initializers.random_normal()) + x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) + with tf.variable_scope('Conv'): + x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) + if architecture == 'skip': + y = torgb(x, y, 2) + + # Layers for >=8x8 resolutions. + for res in range(3, resolution_log2 + 1): + with tf.variable_scope(f'{2**res}x{2**res}'): + x = block(x, res) + if architecture == 'skip': + y = upsample(y) + if architecture == 'skip' or res == resolution_log2: + y = torgb(x, y, res) + + images_out = y + assert images_out.dtype == tf.as_dtype(dtype) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Discriminator. + +def D_main( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + + # Input dimensions. + num_channels = 3, # Number of input color channels. Overridden based on dataset. + resolution = 1024, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + + # Capacity. + fmap_base = 16384, # Overall multiplier for the number of feature maps. + fmap_decay = 1, # Log2 feature map reduction when doubling the resolution. + fmap_min = 1, # Minimum number of feature maps in any layer. + fmap_max = 512, # Maximum number of feature maps in any layer. + + # Internal details. + mapping_layers = 0, # Number of additional mapping layers for the conditioning labels. + mapping_fmaps = None, # Number of activations in the mapping layers, None = default. + mapping_lrmul = 0.1, # Learning rate multiplier for the mapping layers. + architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. + mbstd_group_size = None, # Group size for the minibatch standard deviation layer, None = entire minibatch. + mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer, 0 = disable. + dtype = 'float32', # Data type to use for intermediate activations and outputs. + num_fp16_res = 0, # Use FP16 for the N highest resolutions, regardless of dtype. + conv_clamp = None, # Clamp the output of convolution layers to [-conv_clamp, +conv_clamp], None = disable clamping. + resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations, None = box filter. + + # Comparison methods. + augment_strength = 0, # AdaptiveAugment.get_strength_var() for pagan & adropout. + use_pagan = False, # pagan: Enable? + pagan_num = 16, # pagan: Number of active bits with augment_strength=1. + pagan_fade = 0.5, # pagan: Relative duration of fading in new bits. + score_size = 1, # auxrot: Number of scalars to output. Can vary between evaluations. + score_max = 1, # auxrot: Maximum number of scalars to output. Must be set at construction time. + use_spectral_norm = False, # spectralnorm: Enable? + adaptive_dropout = 0, # adropout: Standard deviation to use with augment_strength=1, 0 = disable. + freeze_layers = 0, # Freeze-D: Number of layers to freeze. + + **_kwargs, # Ignore unrecognized keyword args. +): + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) + assert architecture in ['orig', 'skip', 'resnet'] + if mapping_fmaps is None: + mapping_fmaps = nf(0) + act = nonlinearity + + # Inputs. + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + + # Label embedding and mapping. + if label_size > 0: + y = labels_in + with tf.variable_scope('LabelEmbed'): + y = apply_bias_act(dense_layer(y, fmaps=mapping_fmaps)) + y = normalize_2nd_moment(y) + for idx in range(mapping_layers): + with tf.variable_scope(f'Mapping{idx}'): + y = apply_bias_act(dense_layer(y, fmaps=mapping_fmaps, lrmul=mapping_lrmul), act=act, lrmul=mapping_lrmul) + labels_in = y + + # Adaptive multiplicative dropout. + def adrop(x): + if adaptive_dropout != 0: + s = [tf.shape(x)[0], x.shape[1]] + [1] * (x.shape.rank - 2) + x *= tf.cast(tf.exp(tf.random_normal(s) * (augment_strength * adaptive_dropout)), x.dtype) + return x + + # Freeze-D. + cur_layer_idx = 0 + def is_next_layer_trainable(): + nonlocal cur_layer_idx + trainable = (cur_layer_idx >= freeze_layers) + cur_layer_idx += 1 + return trainable + + # Construct PA-GAN bit vector. + pagan_bits = None + pagan_signs = None + if use_pagan: + with tf.variable_scope('PAGAN'): + idx = tf.range(pagan_num, dtype=tf.float32) + active = (augment_strength * pagan_num - idx - 1) / max(pagan_fade, 1e-8) + 1 + prob = tf.clip_by_value(active[np.newaxis, :], 0, 1) * 0.5 + rnd = tf.random_uniform([tf.shape(images_in)[0], pagan_num]) + pagan_bits = tf.cast(rnd < prob, dtype=tf.float32) + pagan_signs = tf.reduce_prod(1 - pagan_bits * 2, axis=1, keepdims=True) + + # FromRGB block. + def fromrgb(x, y, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB'): + trainable = is_next_layer_trainable() + t = tf.cast(y, 'float16' if res > resolution_log2 - num_fp16_res else dtype) + t = adrop(conv2d_layer(t, fmaps=nf(res-1), kernel=1, trainable=trainable)) + if pagan_bits is not None: + with tf.variable_scope('PAGAN'): + t += dense_layer(tf.cast(pagan_bits, t.dtype), fmaps=nf(res-1), trainable=trainable)[:, :, np.newaxis, np.newaxis] + t = apply_bias_act(t, act=act, clamp=conv_clamp, trainable=trainable) + if x is not None: + t += tf.cast(x, t.dtype) + return t + + # Main block for one resolution. + def block(x, res): # res = 2..resolution_log2 + x = tf.cast(x, 'float16' if res > resolution_log2 - num_fp16_res else dtype) + t = x + with tf.variable_scope('Conv0'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(conv2d_layer(x, fmaps=nf(res-1), kernel=3, trainable=trainable, use_spectral_norm=use_spectral_norm)), act=act, clamp=conv_clamp, trainable=trainable) + with tf.variable_scope('Conv1_down'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel, trainable=trainable, use_spectral_norm=use_spectral_norm)), act=act, clamp=conv_clamp, trainable=trainable) + if architecture == 'resnet': + with tf.variable_scope('Skip'): + trainable = is_next_layer_trainable() + t = adrop(conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel, trainable=trainable)) + x = (x + t) * (1 / np.sqrt(2)) + return x + + # Downsampling block. + def downsample(y): + with tf.variable_scope('Downsample'): + return downsample_2d(y, k=resample_kernel) + + # Layers for >=8x8 resolutions. + x = None + y = images_in + for res in range(resolution_log2, 2, -1): + with tf.variable_scope(f'{2**res}x{2**res}'): + if architecture == 'skip' or res == resolution_log2: + x = fromrgb(x, y, res) + x = block(x, res) + if architecture == 'skip': + y = downsample(y) + + # Layers for 4x4 resolution. + with tf.variable_scope('4x4'): + if architecture == 'skip': + x = fromrgb(x, y, 2) + x = tf.cast(x, dtype) + if mbstd_num_features > 0: + with tf.variable_scope('MinibatchStddev'): + x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) + with tf.variable_scope('Conv'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(conv2d_layer(x, fmaps=nf(1), kernel=3, trainable=trainable, use_spectral_norm=use_spectral_norm)), act=act, clamp=conv_clamp, trainable=trainable) + with tf.variable_scope('Dense0'): + trainable = is_next_layer_trainable() + x = apply_bias_act(adrop(dense_layer(x, fmaps=nf(0), trainable=trainable)), act=act, trainable=trainable) + + # Output layer (always trainable). + with tf.variable_scope('Output'): + if label_size > 0: + assert score_max == 1 + x = apply_bias_act(dense_layer(x, fmaps=mapping_fmaps)) + x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) / np.sqrt(mapping_fmaps) + else: + x = apply_bias_act(dense_layer(x, fmaps=score_max)) + if pagan_signs is not None: + assert score_max == 1 + x *= pagan_signs + scores_out = x[:, :score_size] + + # Output. + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/training_loop.py b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/training_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9dc2ec8a9ec4156d54e6b1efc80ef408ef7ca4 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan2ada_tf_official/training/training_loop.py @@ -0,0 +1,326 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +"""Main training loop.""" + +import os +import pickle +import time +import PIL.Image +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +from training import dataset + +#---------------------------------------------------------------------------- +# Select size and contents of the image snapshot grids that are exported +# periodically during training. + +def setup_snapshot_image_grid(training_set): + gw = np.clip(7680 // training_set.shape[2], 7, 32) + gh = np.clip(4320 // training_set.shape[1], 4, 32) + + # Unconditional. + if training_set.label_size == 0: + reals, labels = training_set.get_minibatch_np(gw * gh) + return (gw, gh), reals, labels + + # Row per class. + cw, ch = (gw, 1) + nw = (gw - 1) // cw + 1 + nh = (gh - 1) // ch + 1 + + # Collect images. + blocks = [[] for _i in range(nw * nh)] + for _iter in range(1000000): + real, label = training_set.get_minibatch_np(1) + idx = np.argmax(label[0]) + while idx < len(blocks) and len(blocks[idx]) >= cw * ch: + idx += training_set.label_size + if idx < len(blocks): + blocks[idx].append((real, label)) + if all(len(block) >= cw * ch for block in blocks): + break + + # Layout grid. + reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) + labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) + for i, block in enumerate(blocks): + for j, (real, label) in enumerate(block): + x = (i % nw) * cw + j % cw + y = (i // nw) * ch + j // cw + if x < gw and y < gh: + reals[x + y * gw] = real[0] + labels[x + y * gw] = label[0] + return (gw, gh), reals, labels + +#---------------------------------------------------------------------------- + +def save_image_grid(images, filename, drange, grid_size): + lo, hi = drange + gw, gh = grid_size + images = np.asarray(images, dtype=np.float32) + images = (images - lo) * (255 / (hi - lo)) + images = np.rint(images).clip(0, 255).astype(np.uint8) + _N, C, H, W = images.shape + images = images.reshape(gh, gw, C, H, W) + images = images.transpose(0, 3, 1, 4, 2) + images = images.reshape(gh * H, gw * W, C) + PIL.Image.fromarray(images, {3: 'RGB', 1: 'L'}[C]).save(filename) + +#---------------------------------------------------------------------------- +# Main training script. + +def training_loop( + run_dir = '.', # Output directory. + G_args = {}, # Options for generator network. + D_args = {}, # Options for discriminator network. + G_opt_args = {}, # Options for generator optimizer. + D_opt_args = {}, # Options for discriminator optimizer. + loss_args = {}, # Options for loss function. + train_dataset_args = {}, # Options for dataset to train with. + metric_dataset_args = {}, # Options for dataset to evaluate metrics against. + augment_args = {}, # Options for adaptive augmentations. + metric_arg_list = [], # Metrics to evaluate during training. + num_gpus = 1, # Number of GPUs to use. + minibatch_size = 32, # Global minibatch size. + minibatch_gpu = 4, # Number of samples processed at a time by one GPU. + G_smoothing_kimg = 10, # Half-life of the exponential moving average (EMA) of generator weights. + G_smoothing_rampup = None, # EMA ramp-up coefficient. + minibatch_repeats = 4, # Number of minibatches to run in the inner loop. + lazy_regularization = True, # Perform regularization as a separate training step? + G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False. + D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False. + total_kimg = 25000, # Total length of the training, measured in thousands of real images. + kimg_per_tick = 4, # Progress snapshot interval. + image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'. + network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'. + resume_pkl = None, # Network pickle to resume training from. + abort_fn = None, # Callback function for determining whether to abort training. + progress_fn = None, # Callback function for updating training progress. +): + assert minibatch_size % (num_gpus * minibatch_gpu) == 0 + start_time = time.time() + + print('Loading training set...') + training_set = dataset.load_dataset(**train_dataset_args) + print('Image shape:', np.int32(training_set.shape).tolist()) + print('Label shape:', [training_set.label_size]) + print() + + print('Constructing networks...') + with tf.device('/gpu:0'): + G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) + D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) + Gs = G.clone('Gs') + if resume_pkl is not None: + print(f'Resuming from "{resume_pkl}"') + with dnnlib.util.open_url(resume_pkl) as f: + rG, rD, rGs = pickle.load(f) + G.copy_vars_from(rG) + D.copy_vars_from(rD) + Gs.copy_vars_from(rGs) + G.print_layers() + D.print_layers() + + print('Exporting sample images...') + grid_size, grid_reals, grid_labels = setup_snapshot_image_grid(training_set) + save_image_grid(grid_reals, os.path.join(run_dir, 'reals.png'), drange=[0,255], grid_size=grid_size) + grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:]) + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=minibatch_gpu) + save_image_grid(grid_fakes, os.path.join(run_dir, 'fakes_init.png'), drange=[-1,1], grid_size=grid_size) + + print(f'Replicating networks across {num_gpus} GPUs...') + G_gpus = [G] + D_gpus = [D] + for gpu in range(1, num_gpus): + with tf.device(f'/gpu:{gpu}'): + G_gpus.append(G.clone(f'{G.name}_gpu{gpu}')) + D_gpus.append(D.clone(f'{D.name}_gpu{gpu}')) + + print('Initializing augmentations...') + aug = None + if augment_args.get('class_name', None) is not None: + aug = dnnlib.util.construct_class_by_name(**augment_args) + aug.init_validation_set(D_gpus=D_gpus, training_set=training_set) + + print('Setting up optimizers...') + G_opt_args = dict(G_opt_args) + D_opt_args = dict(D_opt_args) + for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]: + args['minibatch_multiplier'] = minibatch_size // num_gpus // minibatch_gpu + if lazy_regularization: + mb_ratio = reg_interval / (reg_interval + 1) + args['learning_rate'] *= mb_ratio + if 'beta1' in args: args['beta1'] **= mb_ratio + if 'beta2' in args: args['beta2'] **= mb_ratio + G_opt = tflib.Optimizer(name='TrainG', **G_opt_args) + D_opt = tflib.Optimizer(name='TrainD', **D_opt_args) + G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args) + D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args) + + print('Constructing training graph...') + data_fetch_ops = [] + training_set.configure(minibatch_gpu) + for gpu, (G_gpu, D_gpu) in enumerate(zip(G_gpus, D_gpus)): + with tf.name_scope(f'Train_gpu{gpu}'), tf.device(f'/gpu:{gpu}'): + + # Fetch training data via temporary variables. + with tf.name_scope('DataFetch'): + real_images_var = tf.Variable(name='images', trainable=False, initial_value=tf.zeros([minibatch_gpu] + training_set.shape)) + real_labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([minibatch_gpu, training_set.label_size])) + real_images_write, real_labels_write = training_set.get_minibatch_tf() + real_images_write = tflib.convert_images_from_uint8(real_images_write) + data_fetch_ops += [tf.assign(real_images_var, real_images_write)] + data_fetch_ops += [tf.assign(real_labels_var, real_labels_write)] + + # Evaluate loss function and register gradients. + fake_labels = training_set.get_random_labels_tf(minibatch_gpu) + terms = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, aug=aug, fake_labels=fake_labels, real_images=real_images_var, real_labels=real_labels_var, **loss_args) + if lazy_regularization: + if terms.G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(terms.G_reg * G_reg_interval), G_gpu.trainables) + if terms.D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(terms.D_reg * D_reg_interval), D_gpu.trainables) + else: + if terms.G_reg is not None: terms.G_loss += terms.G_reg + if terms.D_reg is not None: terms.D_loss += terms.D_reg + G_opt.register_gradients(tf.reduce_mean(terms.G_loss), G_gpu.trainables) + D_opt.register_gradients(tf.reduce_mean(terms.D_loss), D_gpu.trainables) + + print('Finalizing training ops...') + data_fetch_op = tf.group(*data_fetch_ops) + G_train_op = G_opt.apply_updates() + D_train_op = D_opt.apply_updates() + G_reg_op = G_reg_opt.apply_updates(allow_no_op=True) + D_reg_op = D_reg_opt.apply_updates(allow_no_op=True) + Gs_beta_in = tf.placeholder(tf.float32, name='Gs_beta_in', shape=[]) + Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta_in) + tflib.init_uninitialized_vars() + with tf.device('/gpu:0'): + peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() + + print('Initializing metrics...') + summary_log = tf.summary.FileWriter(run_dir) + metrics = [] + for args in metric_arg_list: + metric = dnnlib.util.construct_class_by_name(**args) + metric.configure(dataset_args=metric_dataset_args, run_dir=run_dir) + metrics.append(metric) + + print(f'Training for {total_kimg} kimg...') + print() + if progress_fn is not None: + progress_fn(0, total_kimg) + tick_start_time = time.time() + maintenance_time = tick_start_time - start_time + cur_nimg = 0 + cur_tick = -1 + tick_start_nimg = cur_nimg + running_mb_counter = 0 + + done = False + while not done: + + # Compute EMA decay parameter. + Gs_nimg = G_smoothing_kimg * 1000.0 + if G_smoothing_rampup is not None: + Gs_nimg = min(Gs_nimg, cur_nimg * G_smoothing_rampup) + Gs_beta = 0.5 ** (minibatch_size / max(Gs_nimg, 1e-8)) + + # Run training ops. + for _repeat_idx in range(minibatch_repeats): + rounds = range(0, minibatch_size, minibatch_gpu * num_gpus) + run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0) + run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0) + cur_nimg += minibatch_size + running_mb_counter += 1 + + # Fast path without gradient accumulation. + if len(rounds) == 1: + tflib.run([G_train_op, data_fetch_op]) + if run_G_reg: + tflib.run(G_reg_op) + tflib.run([D_train_op, Gs_update_op], {Gs_beta_in: Gs_beta}) + if run_D_reg: + tflib.run(D_reg_op) + + # Slow path with gradient accumulation. + else: + for _round in rounds: + tflib.run(G_train_op) + if run_G_reg: + tflib.run(G_reg_op) + tflib.run(Gs_update_op, {Gs_beta_in: Gs_beta}) + for _round in rounds: + tflib.run(data_fetch_op) + tflib.run(D_train_op) + if run_D_reg: + tflib.run(D_reg_op) + + # Run validation. + if aug is not None: + aug.run_validation(minibatch_size=minibatch_size) + + # Tune augmentation parameters. + if aug is not None: + aug.tune(minibatch_size * minibatch_repeats) + + # Perform maintenance tasks once per tick. + done = (cur_nimg >= total_kimg * 1000) or (abort_fn is not None and abort_fn()) + if done or cur_tick < 0 or cur_nimg >= tick_start_nimg + kimg_per_tick * 1000: + cur_tick += 1 + tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 + tick_start_nimg = cur_nimg + tick_end_time = time.time() + total_time = tick_end_time - start_time + tick_time = tick_end_time - tick_start_time + + # Report progress. + print(' '.join([ + f"tick {autosummary('Progress/tick', cur_tick):<5d}", + f"kimg {autosummary('Progress/kimg', cur_nimg / 1000.0):<8.1f}", + f"time {dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)):<12s}", + f"sec/tick {autosummary('Timing/sec_per_tick', tick_time):<7.1f}", + f"sec/kimg {autosummary('Timing/sec_per_kimg', tick_time / tick_kimg):<7.2f}", + f"maintenance {autosummary('Timing/maintenance_sec', maintenance_time):<6.1f}", + f"gpumem {autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30):<5.1f}", + f"augment {autosummary('Progress/augment', aug.strength if aug is not None else 0):.3f}", + ])) + autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) + autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) + if progress_fn is not None: + progress_fn(cur_nimg // 1000, total_kimg) + + # Save snapshots. + if image_snapshot_ticks is not None and (done or cur_tick % image_snapshot_ticks == 0): + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=minibatch_gpu) + save_image_grid(grid_fakes, os.path.join(run_dir, f'fakes{cur_nimg // 1000:06d}.png'), drange=[-1,1], grid_size=grid_size) + if network_snapshot_ticks is not None and (done or cur_tick % network_snapshot_ticks == 0): + pkl = os.path.join(run_dir, f'network-snapshot-{cur_nimg // 1000:06d}.pkl') + with open(pkl, 'wb') as f: + pickle.dump((G, D, Gs), f) + if len(metrics): + print('Evaluating metrics...') + for metric in metrics: + metric.run(pkl, num_gpus=num_gpus) + + # Update summaries. + for metric in metrics: + metric.update_autosummaries() + tflib.autosummary.save_summaries(summary_log, cur_nimg) + tick_start_time = time.time() + maintenance_time = tick_start_time - tick_end_time + + print() + print('Exiting...') + summary_log.close() + training_set.close() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_converter.py b/ContraCLIP/models/genforce/converters/stylegan_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..75c5f365654139f8183af08359e58239d09c225e --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_converter.py @@ -0,0 +1,271 @@ +# python3.7 +"""Converts StyleGAN model weights from TensorFlow to PyTorch. + +The models can be trained through OR released by the repository: + +https://github.com/NVlabs/stylegan +""" + +import os +import sys +import pickle +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) + +# pylint: disable=wrong-import-position +from tqdm import tqdm +import numpy as np +import tensorflow as tf +import torch +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +from models import build_model +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image +# pylint: enable=wrong-import-position + +__all__ = ['convert_stylegan_weight'] + +GAN_TPYE = 'stylegan' +OFFICIAL_CODE_DIR = 'stylegan_official' +BASE_DIR = os.path.dirname(os.path.relpath(__file__)) +CODE_PATH = os.path.join(BASE_DIR, OFFICIAL_CODE_DIR) + +TRUNC_PSI = 0.7 +TRUNC_LAYERS = 8 +RANDOMIZE_NOISE = False + + +def convert_stylegan_weight(tf_weight_path, + pth_weight_path, + test_num=10, + save_test_image=False, + verbose=False): + """Converts the pre-trained StyleGAN weights. + + Args: + tf_weight_path: Path to the TensorFlow model to load weights from. + pth_weight_path: Path to the PyTorch model to save converted weights. + test_num: Number of samples used to test the conversion. (default: 10) + save_test_image: Whether to save the test images. (default: False) + verbose: Whether to print verbose log message. (default: False) + """ + sess = tf.compat.v1.InteractiveSession() + + print(f'========================================') + print(f'Loading TensorFlow weights from `{tf_weight_path}` ...') + sys.path.insert(0, CODE_PATH) + with open(tf_weight_path, 'rb') as f: + G, D, Gs = pickle.load(f) + sys.path.pop(0) + print(f'Successfully loaded!') + print(f'--------------------') + + z_space_dim = G.input_shapes[0][1] + label_size = G.input_shapes[1][1] + w_space_dim = G.components.mapping.output_shape[2] + image_channels = G.output_shape[1] + resolution = G.output_shape[2] + repeat_w = True + + print(f'Converting TensorFlow weights (G) to PyTorch version ...') + G_vars = dict(G.__getstate__()['variables']) + G_vars.update(dict(G.components.mapping.__getstate__()['variables'])) + G_vars.update(dict(G.components.synthesis.__getstate__()['variables'])) + G_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=repeat_w, + image_channels=image_channels) + G_state_dict = G_pth.state_dict() + for pth_var_name, tf_var_name in G_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in G_vars + assert pth_var_name in G_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(G_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense/' in tf_var_name: + var = var.view(var.shape[0], -1, G_pth.init_res, G_pth.init_res) + var = var.permute(1, 0, 2, 3).flip(2, 3) + elif 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'StyleMod' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelConcat' in tf_var_name: + pass + elif 'Noise' in tf_var_name: + pass + else: + var = var.permute(3, 2, 0, 1) + G_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (Gs) to PyTorch version ...') + Gs_vars = dict(Gs.__getstate__()['variables']) + Gs_vars.update(dict(Gs.components.mapping.__getstate__()['variables'])) + Gs_vars.update(dict(Gs.components.synthesis.__getstate__()['variables'])) + Gs_pth = build_model(gan_type=GAN_TPYE, + module='generator', + resolution=resolution, + z_space_dim=z_space_dim, + w_space_dim=w_space_dim, + label_size=label_size, + repeat_w=True, + image_channels=image_channels) + Gs_state_dict = Gs_pth.state_dict() + for pth_var_name, tf_var_name in Gs_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in Gs_vars + assert pth_var_name in Gs_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(Gs_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense/' in tf_var_name: + var = var.view( + var.shape[0], -1, Gs_pth.init_res, Gs_pth.init_res) + var = var.permute(1, 0, 2, 3).flip(2, 3) + elif 'Dense' in tf_var_name: + var = var.permute(1, 0) + elif 'StyleMod' in tf_var_name: + var = var.permute(1, 0) + elif 'LabelConcat' in tf_var_name: + pass + elif 'Noise' in tf_var_name: + pass + else: + var = var.permute(3, 2, 0, 1) + Gs_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Converting TensorFlow weights (D) to PyTorch version ...') + D_vars = dict(D.__getstate__()['variables']) + D_pth = build_model(gan_type=GAN_TPYE, + module='discriminator', + resolution=resolution, + label_size=label_size, + image_channels=image_channels) + D_state_dict = D_pth.state_dict() + for pth_var_name, tf_var_name in D_pth.pth_to_tf_var_mapping.items(): + assert tf_var_name in D_vars + assert pth_var_name in D_state_dict + if verbose: + print(f' Converting `{tf_var_name}` to `{pth_var_name}`.') + var = torch.from_numpy(np.array(D_vars[tf_var_name])) + if 'weight' in tf_var_name: + if 'Dense' in tf_var_name: + var = var.permute(1, 0) + else: + var = var.permute(3, 2, 0, 1) + D_state_dict[pth_var_name] = var + print(f'Successfully converted!') + print(f'--------------------') + + print(f'Saving PyTorch weights to `{pth_weight_path}` ...') + state_dict = { + 'generator': G_state_dict, + 'discriminator': D_state_dict, + 'generator_smooth': Gs_state_dict, + } + torch.save(state_dict, pth_weight_path) + print(f'Successfully saved!') + print(f'--------------------') + + # Start testing if needed. + if test_num <= 0 or not tf.test.is_built_with_cuda(): + warnings.warn(f'Skip testing the converted weights!') + sess.close() + return + + if save_test_image: + html = HtmlPageVisualizer(num_rows=test_num, num_cols=3) + html.set_headers(['Index', 'Before Conversion', 'After Conversion']) + for i in range(test_num): + html.set_cell(i, 0, text=f'{i}') + + print(f'Testing conversion results ...') + G_pth.load_state_dict(G_state_dict) + D_pth.load_state_dict(D_state_dict) + Gs_pth.load_state_dict(Gs_state_dict) + G_pth.eval().cuda() + D_pth.eval().cuda() + Gs_pth.eval().cuda() + + gs_distance = 0.0 + dg_distance = 0.0 + for i in tqdm(range(test_num)): + # Test Gs(z). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + pth_label = None + tf_output = Gs.run(code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE) + pth_output = Gs_pth(pth_code, + label=pth_label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: Gs distance {distance:.6e}.') + gs_distance += distance + + if save_test_image: + html.set_cell(i, 1, image=postprocess_image(tf_output)[0]) + html.set_cell(i, 2, image=postprocess_image(pth_output)[0]) + + # Test D(G(z)). + code = np.random.randn(1, z_space_dim) + pth_code = torch.from_numpy(code).type(torch.FloatTensor).cuda() + if label_size: + label_id = np.random.randint(label_size) + label = np.zeros((1, label_size), np.float32) + label[0, label_id] = 1.0 + pth_label = torch.from_numpy(label).type(torch.FloatTensor).cuda() + else: + label_id = 0 + label = None + pth_label = None + tf_image = G.run(code, + label, + truncation_psi=TRUNC_PSI, + truncation_cutoff=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE) + tf_output = D.run(tf_image, label) + pth_image = G_pth(pth_code, + label=pth_label, + trunc_psi=TRUNC_PSI, + trunc_layers=TRUNC_LAYERS, + randomize_noise=RANDOMIZE_NOISE)['image'] + pth_output = D_pth(pth_image, pth_label) + pth_output = pth_output.detach().cpu().numpy() + distance = np.average(np.abs(tf_output - pth_output)) + if verbose: + print(f' Test {i:03d}: D(G) distance {distance:.6e}.') + dg_distance += distance + + print(f'Average Gs distance is {gs_distance / test_num:.6e}.') + print(f'Average D(G) distance is {dg_distance / test_num:.6e}.') + print(f'========================================') + + if save_test_image: + html.save(f'{pth_weight_path}.conversion_test.html') + + sess.close() diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/README.md b/ContraCLIP/models/genforce/converters/stylegan_official/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a86a64a60a14ccea6dc3c0a0048a243750fe98fe --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/README.md @@ -0,0 +1,232 @@ +## StyleGAN — Official TensorFlow Implementation +![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg?style=plastic) +![TensorFlow 1.10](https://img.shields.io/badge/tensorflow-1.10-green.svg?style=plastic) +![cuDNN 7.3.1](https://img.shields.io/badge/cudnn-7.3.1-green.svg?style=plastic) +![License CC BY-NC](https://img.shields.io/badge/license-CC_BY--NC-green.svg?style=plastic) + +![Teaser image](./stylegan-teaser.png) +**Picture:** *These people are not real – they were produced by our generator that allows control over different aspects of the image.* + +This repository contains the official TensorFlow implementation of the following paper: + +> **A Style-Based Generator Architecture for Generative Adversarial Networks**
+> Tero Karras (NVIDIA), Samuli Laine (NVIDIA), Timo Aila (NVIDIA)
+> https://arxiv.org/abs/1812.04948 +> +> **Abstract:** *We propose an alternative generator architecture for generative adversarial networks, borrowing from style transfer literature. The new architecture leads to an automatically learned, unsupervised separation of high-level attributes (e.g., pose and identity when trained on human faces) and stochastic variation in the generated images (e.g., freckles, hair), and it enables intuitive, scale-specific control of the synthesis. The new generator improves the state-of-the-art in terms of traditional distribution quality metrics, leads to demonstrably better interpolation properties, and also better disentangles the latent factors of variation. To quantify interpolation quality and disentanglement, we propose two new, automated methods that are applicable to any generator architecture. Finally, we introduce a new, highly varied and high-quality dataset of human faces.* + +For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)
+For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)
+ +**★★★ NEW: StyleGAN2 is available at [https://github.com/NVlabs/stylegan2](https://github.com/NVlabs/stylegan2) ★★★** + +## Resources + +Material related to our paper is available via the following links: + +- Paper: https://arxiv.org/abs/1812.04948 +- Video: https://youtu.be/kSLJriaOumA +- Code: https://github.com/NVlabs/stylegan +- FFHQ: https://github.com/NVlabs/ffhq-dataset + +Additional material can be found on Google Drive: + +| Path | Description +| :--- | :---------- +| [StyleGAN](https://drive.google.com/open?id=1uka3a1noXHAydRPRbknqwKVGODvnmUBX) | Main folder. +| ├  [stylegan-paper.pdf](https://drive.google.com/open?id=1v-HkF3Ehrpon7wVIx4r5DLcko_U_V6Lt) | High-quality version of the paper PDF. +| ├  [stylegan-video.mp4](https://drive.google.com/open?id=1uzwkZHQX_9pYg1i0d1Nbe3D9xPO8-qBf) | High-quality version of the result video. +| ├  [images](https://drive.google.com/open?id=1-l46akONUWF6LCpDoeq63H53rD7MeiTd) | Example images produced using our generator. +| │  ├  [representative-images](https://drive.google.com/open?id=1ToY5P4Vvf5_c3TyUizQ8fckFFoFtBvD8) | High-quality images to be used in articles, blog posts, etc. +| │  └  [100k-generated-images](https://drive.google.com/open?id=100DJ0QXyG89HZzB4w2Cbyf4xjNK54cQ1) | 100,000 generated images for different amounts of truncation. +| │     ├  [ffhq-1024x1024](https://drive.google.com/open?id=14lm8VRN1pr4g_KVe6_LvyDX1PObst6d4) | Generated using Flickr-Faces-HQ dataset at 1024×1024. +| │     ├  [bedrooms-256x256](https://drive.google.com/open?id=1Vxz9fksw4kgjiHrvHkX4Hze4dyThFW6t) | Generated using LSUN Bedroom dataset at 256×256. +| │     ├  [cars-512x384](https://drive.google.com/open?id=1MFCvOMdLE2_mpeLPTiDw5dxc2CRuKkzS) | Generated using LSUN Car dataset at 512×384. +| │     └  [cats-256x256](https://drive.google.com/open?id=1gq-Gj3GRFiyghTPKhp8uDMA9HV_0ZFWQ) | Generated using LSUN Cat dataset at 256×256. +| ├  [videos](https://drive.google.com/open?id=1N8pOd_Bf8v89NGUaROdbD8-ayLPgyRRo) | Example videos produced using our generator. +| │  └  [high-quality-video-clips](https://drive.google.com/open?id=1NFO7_vH0t98J13ckJYFd7kuaTkyeRJ86) | Individual segments of the result video as high-quality MP4. +| ├  [ffhq-dataset](https://drive.google.com/open?id=1u2xu7bSrWxrbUxk-dT-UvEJq8IjdmNTP) | Raw data for the [Flickr-Faces-HQ dataset](https://github.com/NVlabs/ffhq-dataset). +| └  [networks](https://drive.google.com/open?id=1MASQyN5m0voPcx7-9K0r5gObhvvPups7) | Pre-trained networks as pickled instances of [dnnlib.tflib.Network](./dnnlib/tflib/network.py). +|    ├  [stylegan-ffhq-1024x1024.pkl](https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ) | StyleGAN trained with Flickr-Faces-HQ dataset at 1024×1024. +|    ├  [stylegan-celebahq-1024x1024.pkl](https://drive.google.com/uc?id=1MGqJl28pN4t7SAtSrPdSRJSQJqahkzUf) | StyleGAN trained with CelebA-HQ dataset at 1024×1024. +|    ├  [stylegan-bedrooms-256x256.pkl](https://drive.google.com/uc?id=1MOSKeGF0FJcivpBI7s63V9YHloUTORiF) | StyleGAN trained with LSUN Bedroom dataset at 256×256. +|    ├  [stylegan-cars-512x384.pkl](https://drive.google.com/uc?id=1MJ6iCfNtMIRicihwRorsM3b7mmtmK9c3) | StyleGAN trained with LSUN Car dataset at 512×384. +|    ├  [stylegan-cats-256x256.pkl](https://drive.google.com/uc?id=1MQywl0FNt6lHu8E_EUqnRbviagS7fbiJ) | StyleGAN trained with LSUN Cat dataset at 256×256. +|    └  [metrics](https://drive.google.com/open?id=1MvYdWCBuMfnoYGptRH-AgKLbPTsIQLhl) | Auxiliary networks for the quality and disentanglement metrics. +|       ├  [inception_v3_features.pkl](https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn) | Standard [Inception-v3](https://arxiv.org/abs/1512.00567) classifier that outputs a raw feature vector. +|       ├  [vgg16_zhang_perceptual.pkl](https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2) | Standard [LPIPS](https://arxiv.org/abs/1801.03924) metric to estimate perceptual similarity. +|       ├  [celebahq-classifier-00-male.pkl](https://drive.google.com/uc?id=1Q5-AI6TwWhCVM7Muu4tBM7rp5nG_gmCX) | Binary classifier trained to detect a single attribute of CelebA-HQ. +|       └ ⋯ | Please see the file listing for remaining networks. + +## Licenses + +All material, excluding the Flickr-Faces-HQ dataset, is made available under [Creative Commons BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) license by NVIDIA Corporation. You can **use, redistribute, and adapt** the material for **non-commercial purposes**, as long as you give appropriate credit by **citing our paper** and **indicating any changes** that you've made. + +For license information regarding the FFHQ dataset, please refer to the [Flickr-Faces-HQ repository](https://github.com/NVlabs/ffhq-dataset). + +`inception_v3_features.pkl` and `inception_v3_softmax.pkl` are derived from the pre-trained [Inception-v3](https://arxiv.org/abs/1512.00567) network by Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, and Zbigniew Wojna. The network was originally shared under [Apache 2.0](https://github.com/tensorflow/models/blob/master/LICENSE) license on the [TensorFlow Models](https://github.com/tensorflow/models) repository. + +`vgg16.pkl` and `vgg16_zhang_perceptual.pkl` are derived from the pre-trained [VGG-16](https://arxiv.org/abs/1409.1556) network by Karen Simonyan and Andrew Zisserman. The network was originally shared under [Creative Commons BY 4.0](https://creativecommons.org/licenses/by/4.0/) license on the [Very Deep Convolutional Networks for Large-Scale Visual Recognition](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) project page. + +`vgg16_zhang_perceptual.pkl` is further derived from the pre-trained [LPIPS](https://arxiv.org/abs/1801.03924) weights by Richard Zhang, Phillip Isola, Alexei A. Efros, Eli Shechtman, and Oliver Wang. The weights were originally shared under [BSD 2-Clause "Simplified" License](https://github.com/richzhang/PerceptualSimilarity/blob/master/LICENSE) on the [PerceptualSimilarity](https://github.com/richzhang/PerceptualSimilarity) repository. + +## System requirements + +* Both Linux and Windows are supported, but we strongly recommend Linux for performance and compatibility reasons. +* 64-bit Python 3.6 installation. We recommend Anaconda3 with numpy 1.14.3 or newer. +* TensorFlow 1.10.0 or newer with GPU support. +* One or more high-end NVIDIA GPUs with at least 11GB of DRAM. We recommend NVIDIA DGX-1 with 8 Tesla V100 GPUs. +* NVIDIA driver 391.35 or newer, CUDA toolkit 9.0 or newer, cuDNN 7.3.1 or newer. + +## Using pre-trained networks + +A minimal example of using a pre-trained StyleGAN generator is given in [pretrained_example.py](./pretrained_example.py). When executed, the script downloads a pre-trained StyleGAN generator from Google Drive and uses it to generate an image: + +``` +> python pretrained_example.py +Downloading https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ .... done + +Gs Params OutputShape WeightShape +--- --- --- --- +latents_in - (?, 512) - +... +images_out - (?, 3, 1024, 1024) - +--- --- --- --- +Total 26219627 + +> ls results +example.png # https://drive.google.com/uc?id=1UDLT_zb-rof9kKH0GwiJW_bS9MoZi8oP +``` + +A more advanced example is given in [generate_figures.py](./generate_figures.py). The script reproduces the figures from our paper in order to illustrate style mixing, noise inputs, and truncation: +``` +> python generate_figures.py +results/figure02-uncurated-ffhq.png # https://drive.google.com/uc?id=1U3r1xgcD7o-Fd0SBRpq8PXYajm7_30cu +results/figure03-style-mixing.png # https://drive.google.com/uc?id=1U-nlMDtpnf1RcYkaFQtbh5oxnhA97hy6 +results/figure04-noise-detail.png # https://drive.google.com/uc?id=1UX3m39u_DTU6eLnEW6MqGzbwPFt2R9cG +results/figure05-noise-components.png # https://drive.google.com/uc?id=1UQKPcvYVeWMRccGMbs2pPD9PVv1QDyp_ +results/figure08-truncation-trick.png # https://drive.google.com/uc?id=1ULea0C12zGlxdDQFNLXOWZCHi3QNfk_v +results/figure10-uncurated-bedrooms.png # https://drive.google.com/uc?id=1UEBnms1XMfj78OHj3_cx80mUf_m9DUJr +results/figure11-uncurated-cars.png # https://drive.google.com/uc?id=1UO-4JtAs64Kun5vIj10UXqAJ1d5Ir1Ke +results/figure12-uncurated-cats.png # https://drive.google.com/uc?id=1USnJc14prlu3QAYxstrtlfXC9sDWPA-W +``` + +The pre-trained networks are stored as standard pickle files on Google Drive: + +``` +# Load pre-trained network. +url = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl +with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f: + _G, _D, Gs = pickle.load(f) + # _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run. + # _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run. + # Gs = Long-term average of the generator. Yields higher-quality results than the instantaneous snapshot. +``` + +The above code downloads the file and unpickles it to yield 3 instances of [dnnlib.tflib.Network](./dnnlib/tflib/network.py). To generate images, you will typically want to use `Gs` – the other two networks are provided for completeness. In order for `pickle.load()` to work, you will need to have the `dnnlib` source directory in your PYTHONPATH and a `tf.Session` set as default. The session can initialized by calling `dnnlib.tflib.init_tf()`. + +There are three ways to use the pre-trained generator: + +1. Use `Gs.run()` for immediate-mode operation where the inputs and outputs are numpy arrays: + ``` + # Pick latent vector. + rnd = np.random.RandomState(5) + latents = rnd.randn(1, Gs.input_shape[1]) + + # Generate image. + fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) + images = Gs.run(latents, None, truncation_psi=0.7, randomize_noise=True, output_transform=fmt) + ``` + The first argument is a batch of latent vectors of shape `[num, 512]`. The second argument is reserved for class labels (not used by StyleGAN). The remaining keyword arguments are optional and can be used to further modify the operation (see below). The output is a batch of images, whose format is dictated by the `output_transform` argument. + +2. Use `Gs.get_output_for()` to incorporate the generator as a part of a larger TensorFlow expression: + ``` + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + images = Gs_clone.get_output_for(latents, None, is_validation=True, randomize_noise=True) + images = tflib.convert_images_to_uint8(images) + result_expr.append(inception_clone.get_output_for(images)) + ``` + The above code is from [metrics/frechet_inception_distance.py](./metrics/frechet_inception_distance.py). It generates a batch of random images and feeds them directly to the [Inception-v3](https://arxiv.org/abs/1512.00567) network without having to convert the data to numpy arrays in between. + +3. Look up `Gs.components.mapping` and `Gs.components.synthesis` to access individual sub-networks of the generator. Similar to `Gs`, the sub-networks are represented as independent instances of [dnnlib.tflib.Network](./dnnlib/tflib/network.py): + ``` + src_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in src_seeds) + src_dlatents = Gs.components.mapping.run(src_latents, None) # [seed, layer, component] + src_images = Gs.components.synthesis.run(src_dlatents, randomize_noise=False, **synthesis_kwargs) + ``` + The above code is from [generate_figures.py](./generate_figures.py). It first transforms a batch of latent vectors into the intermediate *W* space using the mapping network and then turns these vectors into a batch of images using the synthesis network. The `dlatents` array stores a separate copy of the same *w* vector for each layer of the synthesis network to facilitate style mixing. + +The exact details of the generator are defined in [training/networks_stylegan.py](./training/networks_stylegan.py) (see `G_style`, `G_mapping`, and `G_synthesis`). The following keyword arguments can be specified to modify the behavior when calling `run()` and `get_output_for()`: + +* `truncation_psi` and `truncation_cutoff` control the truncation trick that that is performed by default when using `Gs` (ψ=0.7, cutoff=8). It can be disabled by setting `truncation_psi=1` or `is_validation=True`, and the image quality can be further improved at the cost of variation by setting e.g. `truncation_psi=0.5`. Note that truncation is always disabled when using the sub-networks directly. The average *w* needed to manually perform the truncation trick can be looked up using `Gs.get_var('dlatent_avg')`. + +* `randomize_noise` determines whether to use re-randomize the noise inputs for each generated image (`True`, default) or whether to use specific noise values for the entire minibatch (`False`). The specific values can be accessed via the `tf.Variable` instances that are found using `[var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]`. + +* When using the mapping network directly, you can specify `dlatent_broadcast=None` to disable the automatic duplication of `dlatents` over the layers of the synthesis network. + +* Runtime performance can be fine-tuned via `structure='fixed'` and `dtype='float16'`. The former disables support for progressive growing, which is not needed for a fully-trained generator, and the latter performs all computation using half-precision floating point arithmetic. + +## Preparing datasets for training + +The training and evaluation scripts operate on datasets stored as multi-resolution TFRecords. Each dataset is represented by a directory containing the same image data in several resolutions to enable efficient streaming. There is a separate *.tfrecords file for each resolution, and if the dataset contains labels, they are stored in a separate file as well. By default, the scripts expect to find the datasets at `datasets//-.tfrecords`. The directory can be changed by editing [config.py](./config.py): + +``` +result_dir = 'results' +data_dir = 'datasets' +cache_dir = 'cache' +``` + +To obtain the FFHQ dataset (`datasets/ffhq`), please refer to the [Flickr-Faces-HQ repository](https://github.com/NVlabs/ffhq-dataset). + +To obtain the CelebA-HQ dataset (`datasets/celebahq`), please refer to the [Progressive GAN repository](https://github.com/tkarras/progressive_growing_of_gans). + +To obtain other datasets, including LSUN, please consult their corresponding project pages. The datasets can be converted to multi-resolution TFRecords using the provided [dataset_tool.py](./dataset_tool.py): + +``` +> python dataset_tool.py create_lsun datasets/lsun-bedroom-full ~/lsun/bedroom_lmdb --resolution 256 +> python dataset_tool.py create_lsun_wide datasets/lsun-car-512x384 ~/lsun/car_lmdb --width 512 --height 384 +> python dataset_tool.py create_lsun datasets/lsun-cat-full ~/lsun/cat_lmdb --resolution 256 +> python dataset_tool.py create_cifar10 datasets/cifar10 ~/cifar10 +> python dataset_tool.py create_from_images datasets/custom-dataset ~/custom-images +``` + +## Training networks + +Once the datasets are set up, you can train your own StyleGAN networks as follows: + +1. Edit [train.py](./train.py) to specify the dataset and training configuration by uncommenting or editing specific lines. +2. Run the training script with `python train.py`. +3. The results are written to a newly created directory `results/-`. +4. The training may take several days (or weeks) to complete, depending on the configuration. + +By default, `train.py` is configured to train the highest-quality StyleGAN (configuration F in Table 1) for the FFHQ dataset at 1024×1024 resolution using 8 GPUs. Please note that we have used 8 GPUs in all of our experiments. Training with fewer GPUs may not produce identical results – if you wish to compare against our technique, we strongly recommend using the same number of GPUs. + +Expected training times for the default configuration using Tesla V100 GPUs: + +| GPUs | 1024×1024 | 512×512 | 256×256 | +| :--- | :-------------- | :------------ | :------------ | +| 1 | 41 days 4 hours | 24 days 21 hours | 14 days 22 hours | +| 2 | 21 days 22 hours | 13 days 7 hours | 9 days 5 hours | +| 4 | 11 days 8 hours | 7 days 0 hours | 4 days 21 hours | +| 8 | 6 days 14 hours | 4 days 10 hours | 3 days 8 hours | + +## Evaluating quality and disentanglement + +The quality and disentanglement metrics used in our paper can be evaluated using [run_metrics.py](./run_metrics.py). By default, the script will evaluate the Fréchet Inception Distance (`fid50k`) for the pre-trained FFHQ generator and write the results into a newly created directory under `results`. The exact behavior can be changed by uncommenting or editing specific lines in [run_metrics.py](./run_metrics.py). + +Expected evaluation time and results for the pre-trained FFHQ generator using one Tesla V100 GPU: + +| Metric | Time | Result | Description +| :----- | :--- | :----- | :---------- +| fid50k | 16 min | 4.4159 | Fréchet Inception Distance using 50,000 images. +| ppl_zfull | 55 min | 664.8854 | Perceptual Path Length for full paths in *Z*. +| ppl_wfull | 55 min | 233.3059 | Perceptual Path Length for full paths in *W*. +| ppl_zend | 55 min | 666.1057 | Perceptual Path Length for path endpoints in *Z*. +| ppl_wend | 55 min | 197.2266 | Perceptual Path Length for path endpoints in *W*. +| ls | 10 hours | z: 165.0106
w: 3.7447 | Linear Separability in *Z* and *W*. + +Please note that the exact results may vary from run to run due to the non-deterministic nature of TensorFlow. + +## Acknowledgements + +We thank Jaakko Lehtinen, David Luebke, and Tuomas Kynkäänniemi for in-depth discussions and helpful comments; Janne Hellsten, Tero Kuosmanen, and Pekka Jänis for compute infrastructure and help with the code release. diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/config.py b/ContraCLIP/models/genforce/converters/stylegan_official/config.py new file mode 100644 index 0000000000000000000000000000000000000000..dcf45253e888806dc58d8dfc994d2dad96527172 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/config.py @@ -0,0 +1,18 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Global configuration.""" + +#---------------------------------------------------------------------------- +# Paths. + +result_dir = 'results' +data_dir = 'datasets' +cache_dir = 'cache' +run_dir_ignore = ['results', 'datasets', 'cache'] + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dataset_tool.py b/ContraCLIP/models/genforce/converters/stylegan_official/dataset_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..4ddfe448e2ccaa30e04ad4b49761d406846c962f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dataset_tool.py @@ -0,0 +1,645 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.""" + +# pylint: disable=too-many-lines +import os +import sys +import glob +import argparse +import threading +import six.moves.queue as Queue # pylint: disable=import-error +import traceback +import numpy as np +import tensorflow as tf +import PIL.Image +import dnnlib.tflib as tflib + +from training import dataset + +#---------------------------------------------------------------------------- + +def error(msg): + print('Error: ' + msg) + exit(1) + +#---------------------------------------------------------------------------- + +class TFRecordExporter: + def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10): + self.tfrecord_dir = tfrecord_dir + self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) + self.expected_images = expected_images + self.cur_images = 0 + self.shape = None + self.resolution_log2 = None + self.tfr_writers = [] + self.print_progress = print_progress + self.progress_interval = progress_interval + + if self.print_progress: + print('Creating dataset "%s"' % tfrecord_dir) + if not os.path.isdir(self.tfrecord_dir): + os.makedirs(self.tfrecord_dir) + assert os.path.isdir(self.tfrecord_dir) + + def close(self): + if self.print_progress: + print('%-40s\r' % 'Flushing data...', end='', flush=True) + for tfr_writer in self.tfr_writers: + tfr_writer.close() + self.tfr_writers = [] + if self.print_progress: + print('%-40s\r' % '', end='', flush=True) + print('Added %d images.' % self.cur_images) + + def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order. + order = np.arange(self.expected_images) + np.random.RandomState(123).shuffle(order) + return order + + def add_image(self, img): + if self.print_progress and self.cur_images % self.progress_interval == 0: + print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) + if self.shape is None: + self.shape = img.shape + self.resolution_log2 = int(np.log2(self.shape[1])) + assert self.shape[0] in [1, 3] + assert self.shape[1] == self.shape[2] + assert self.shape[1] == 2**self.resolution_log2 + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for lod in range(self.resolution_log2 - 1): + tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod) + self.tfr_writers.append(tf.python_io.TFRecordWriter(tfr_file, tfr_opt)) + assert img.shape == self.shape + for lod, tfr_writer in enumerate(self.tfr_writers): + if lod: + img = img.astype(np.float32) + img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 + quant = np.rint(img).clip(0, 255).astype(np.uint8) + ex = tf.train.Example(features=tf.train.Features(feature={ + 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), + 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) + tfr_writer.write(ex.SerializeToString()) + self.cur_images += 1 + + def add_labels(self, labels): + if self.print_progress: + print('%-40s\r' % 'Saving labels...', end='', flush=True) + assert labels.shape[0] == self.cur_images + with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: + np.save(f, labels.astype(np.float32)) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + +#---------------------------------------------------------------------------- + +class ExceptionInfo(object): + def __init__(self): + self.value = sys.exc_info()[1] + self.traceback = traceback.format_exc() + +#---------------------------------------------------------------------------- + +class WorkerThread(threading.Thread): + def __init__(self, task_queue): + threading.Thread.__init__(self) + self.task_queue = task_queue + + def run(self): + while True: + func, args, result_queue = self.task_queue.get() + if func is None: + break + try: + result = func(*args) + except: + result = ExceptionInfo() + result_queue.put((result, args)) + +#---------------------------------------------------------------------------- + +class ThreadPool(object): + def __init__(self, num_threads): + assert num_threads >= 1 + self.task_queue = Queue.Queue() + self.result_queues = dict() + self.num_threads = num_threads + for _idx in range(self.num_threads): + thread = WorkerThread(self.task_queue) + thread.daemon = True + thread.start() + + def add_task(self, func, args=()): + assert hasattr(func, '__call__') # must be a function + if func not in self.result_queues: + self.result_queues[func] = Queue.Queue() + self.task_queue.put((func, args, self.result_queues[func])) + + def get_result(self, func): # returns (result, args) + result, args = self.result_queues[func].get() + if isinstance(result, ExceptionInfo): + print('\n\nWorker thread caught an exception:\n' + result.traceback) + raise result.value + return result, args + + def finish(self): + for _idx in range(self.num_threads): + self.task_queue.put((None, (), None)) + + def __enter__(self): # for 'with' statement + return self + + def __exit__(self, *excinfo): + self.finish() + + def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): + if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4 + assert max_items_in_flight >= 1 + results = [] + retire_idx = [0] + + def task_func(prepared, _idx): + return process_func(prepared) + + def retire_result(): + processed, (_prepared, idx) = self.get_result(task_func) + results[idx] = processed + while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: + yield post_func(results[retire_idx[0]]) + results[retire_idx[0]] = None + retire_idx[0] += 1 + + for idx, item in enumerate(item_iterator): + prepared = pre_func(item) + results.append(None) + self.add_task(func=task_func, args=(prepared, idx)) + while retire_idx[0] < idx - max_items_in_flight + 2: + for res in retire_result(): yield res + while retire_idx[0] < len(results): + for res in retire_result(): yield res + +#---------------------------------------------------------------------------- + +def display(tfrecord_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf({'gpu_options.allow_growth': True}) + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) + tflib.init_uninitialized_vars() + import cv2 # pip install opencv-python + + idx = 0 + while True: + try: + images, labels = dset.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + break + if idx == 0: + print('Displaying images') + cv2.namedWindow('dataset_tool') + print('Press SPACE or ENTER to advance, ESC to exit') + print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) + cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR + idx += 1 + if cv2.waitKey() == 27: + break + print('\nDisplayed %d images.' % idx) + +#---------------------------------------------------------------------------- + +def extract(tfrecord_dir, output_dir): + print('Loading dataset "%s"' % tfrecord_dir) + tflib.init_tf({'gpu_options.allow_growth': True}) + dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) + tflib.init_uninitialized_vars() + + print('Extracting images to "%s"' % output_dir) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + idx = 0 + while True: + if idx % 10 == 0: + print('%d\r' % idx, end='', flush=True) + try: + images, _labels = dset.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + break + if images.shape[1] == 1: + img = PIL.Image.fromarray(images[0][0], 'L') + else: + img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') + img.save(os.path.join(output_dir, 'img%08d.png' % idx)) + idx += 1 + print('Extracted %d images.' % idx) + +#---------------------------------------------------------------------------- + +def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): + max_label_size = 0 if ignore_labels else 'full' + print('Loading dataset "%s"' % tfrecord_dir_a) + tflib.init_tf({'gpu_options.allow_growth': True}) + dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) + print('Loading dataset "%s"' % tfrecord_dir_b) + dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) + tflib.init_uninitialized_vars() + + print('Comparing datasets') + idx = 0 + identical_images = 0 + identical_labels = 0 + while True: + if idx % 100 == 0: + print('%d\r' % idx, end='', flush=True) + try: + images_a, labels_a = dset_a.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + images_a, labels_a = None, None + try: + images_b, labels_b = dset_b.get_minibatch_np(1) + except tf.errors.OutOfRangeError: + images_b, labels_b = None, None + if images_a is None or images_b is None: + if images_a is not None or images_b is not None: + print('Datasets contain different number of images') + break + if images_a.shape == images_b.shape and np.all(images_a == images_b): + identical_images += 1 + else: + print('Image %d is different' % idx) + if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): + identical_labels += 1 + else: + print('Label %d is different' % idx) + idx += 1 + print('Identical images: %d / %d' % (identical_images, idx)) + if not ignore_labels: + print('Identical labels: %d / %d' % (identical_labels, idx)) + +#---------------------------------------------------------------------------- + +def create_mnist(tfrecord_dir, mnist_dir): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: + labels = np.frombuffer(file.read(), np.uint8, offset=8) + images = images.reshape(-1, 1, 28, 28) + images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (60000,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): + print('Loading MNIST from "%s"' % mnist_dir) + import gzip + with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: + images = np.frombuffer(file.read(), np.uint8, offset=16) + images = images.reshape(-1, 28, 28) + images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) + assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + + with TFRecordExporter(tfrecord_dir, num_images) as tfr: + rnd = np.random.RandomState(random_seed) + for _idx in range(num_images): + tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) + +#---------------------------------------------------------------------------- + +def create_cifar10(tfrecord_dir, cifar10_dir): + print('Loading CIFAR-10 from "%s"' % cifar10_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 6): + with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data['data'].reshape(-1, 3, 32, 32)) + labels.append(data['labels']) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_cifar100(tfrecord_dir, cifar100_dir): + print('Loading CIFAR-100 from "%s"' % cifar100_dir) + import pickle + with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images = data['data'].reshape(-1, 3, 32, 32) + labels = np.array(data['fine_labels']) + assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (50000,) and labels.dtype == np.int32 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 99 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_svhn(tfrecord_dir, svhn_dir): + print('Loading SVHN from "%s"' % svhn_dir) + import pickle + images = [] + labels = [] + for batch in range(1, 4): + with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: + data = pickle.load(file, encoding='latin1') + images.append(data[0]) + labels.append(data[1]) + images = np.concatenate(images) + labels = np.concatenate(labels) + assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 + assert labels.shape == (73257,) and labels.dtype == np.uint8 + assert np.min(images) == 0 and np.max(images) == 255 + assert np.min(labels) == 0 and np.max(labels) == 9 + onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) + onehot[np.arange(labels.size), labels] = 1.0 + + with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + tfr.add_image(images[order[idx]]) + tfr.add_labels(onehot[order]) + +#---------------------------------------------------------------------------- + +def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images) as tfr: + for _idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + crop = np.min(img.shape[:2]) + img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + +#---------------------------------------------------------------------------- + +def create_lsun_wide(tfrecord_dir, lmdb_dir, width=512, height=384, max_images=None): + assert width == 2 ** int(np.round(np.log2(width))) + assert height <= width + print('Loading LSUN dataset from "%s"' % lmdb_dir) + import lmdb # pip install lmdb # pylint: disable=import-error + import cv2 # pip install opencv-python + import io + with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: + total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter + if max_images is None: + max_images = total_images + with TFRecordExporter(tfrecord_dir, max_images, print_progress=False) as tfr: + for idx, (_key, value) in enumerate(txn.cursor()): + try: + try: + img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) + if img is None: + raise IOError('cv2.imdecode failed') + img = img[:, :, ::-1] # BGR => RGB + except IOError: + img = np.asarray(PIL.Image.open(io.BytesIO(value))) + + ch = int(np.round(width * img.shape[0] / img.shape[1])) + if img.shape[1] < width or ch < height: + continue + + img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2] + img = PIL.Image.fromarray(img, 'RGB') + img = img.resize((width, height), PIL.Image.ANTIALIAS) + img = np.asarray(img) + img = img.transpose([2, 0, 1]) # HWC => CHW + + canvas = np.zeros([3, width, width], dtype=np.uint8) + canvas[:, (width - height) // 2 : (width + height) // 2] = img + tfr.add_image(canvas) + print('\r%d / %d => %d ' % (idx + 1, total_images, tfr.cur_images), end='') + + except: + print(sys.exc_info()[1]) + if tfr.cur_images == max_images: + break + print() + +#---------------------------------------------------------------------------- + +def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): + print('Loading CelebA from "%s"' % celeba_dir) + glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') + image_filenames = sorted(glob.glob(glob_pattern)) + expected_images = 202599 + if len(image_filenames) != expected_images: + error('Expected to find %d images' % expected_images) + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + assert img.shape == (218, 178, 3) + img = img[cy - 64 : cy + 64, cx - 64 : cx + 64] + img = img.transpose(2, 0, 1) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_images(tfrecord_dir, image_dir, shuffle): + print('Loading images from "%s"' % image_dir) + image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) + if len(image_filenames) == 0: + error('No input images found') + + img = np.asarray(PIL.Image.open(image_filenames[0])) + resolution = img.shape[0] + channels = img.shape[2] if img.ndim == 3 else 1 + if img.shape[1] != resolution: + error('Input images must have the same width and height') + if resolution != 2 ** int(np.floor(np.log2(resolution))): + error('Input image resolution must be a power-of-two') + if channels not in [1, 3]: + error('Input images must be stored as RGB or grayscale') + + with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) + for idx in range(order.size): + img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) + if channels == 1: + img = img[np.newaxis, :, :] # HW => CHW + else: + img = img.transpose([2, 0, 1]) # HWC => CHW + tfr.add_image(img) + +#---------------------------------------------------------------------------- + +def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): + print('Loading HDF5 archive from "%s"' % hdf5_filename) + import h5py # conda install h5py + with h5py.File(hdf5_filename, 'r') as hdf5_file: + hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3]) + with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: + order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0]) + for idx in range(order.size): + tfr.add_image(hdf5_data[order[idx]]) + npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' + if os.path.isfile(npy_filename): + tfr.add_labels(np.load(npy_filename)[order]) + +#---------------------------------------------------------------------------- + +def execute_cmdline(argv): + prog = argv[0] + parser = argparse.ArgumentParser( + prog = prog, + description = 'Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.', + epilog = 'Type "%s -h" for more information.' % prog) + + subparsers = parser.add_subparsers(dest='command') + subparsers.required = True + def add_command(cmd, desc, example=None): + epilog = 'Example: %s %s' % (prog, example) if example is not None else None + return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) + + p = add_command( 'display', 'Display images in dataset.', + 'display datasets/mnist') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + + p = add_command( 'extract', 'Extract images from dataset.', + 'extract datasets/mnist mnist-images') + p.add_argument( 'tfrecord_dir', help='Directory containing dataset') + p.add_argument( 'output_dir', help='Directory to extract the images into') + + p = add_command( 'compare', 'Compare two datasets.', + 'compare datasets/mydataset datasets/mnist') + p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset') + p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset') + p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) + + p = add_command( 'create_mnist', 'Create dataset for MNIST.', + 'create_mnist datasets/mnist ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + + p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.', + 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'mnist_dir', help='Directory containing MNIST') + p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000) + p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123) + + p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.', + 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10') + + p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.', + 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100') + + p = add_command( 'create_svhn', 'Create dataset for SVHN.', + 'create_svhn datasets/svhn ~/downloads/svhn') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'svhn_dir', help='Directory containing SVHN') + + p = add_command( 'create_lsun', 'Create dataset for single LSUN category.', + 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_lsun_wide', 'Create LSUN dataset with non-square aspect ratio.', + 'create_lsun_wide datasets/lsun-car-512x384 ~/downloads/lsun/car_lmdb --width 512 --height 384') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') + p.add_argument( '--width', help='Output width (default: 512)', type=int, default=512) + p.add_argument( '--height', help='Output height (default: 384)', type=int, default=384) + p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) + + p = add_command( 'create_celeba', 'Create dataset for CelebA.', + 'create_celeba datasets/celeba ~/downloads/celeba') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'celeba_dir', help='Directory containing CelebA') + p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89) + p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) + + p = add_command( 'create_from_images', 'Create dataset from a directory full of images.', + 'create_from_images datasets/mydataset myimagedir') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'image_dir', help='Directory containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.', + 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') + p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') + p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images') + p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) + + args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) + func = globals()[args.command] + del args.command + func(**vars(args)) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + execute_cmdline(sys.argv) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24f79eaf5828b921836b7961d0a41755c3018ecc --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +from . import submission + +from .submission.run_context import RunContext + +from .submission.submit import SubmitTarget +from .submission.submit import PathType +from .submission.submit import SubmitConfig +from .submission.submit import get_path_from_template +from .submission.submit import submit_run + +from .util import EasyDict + +submit_config: SubmitConfig = None # Package level variable for SubmitConfig which is only valid when inside the run function. diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/__init__.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..613ee846008e201564ee46fa5b5726c96302fef1 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +from . import run_context +from . import submit diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/_internal/run.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/_internal/run.py new file mode 100644 index 0000000000000000000000000000000000000000..fd1dc53bfbcceb85c8baf0a7f6402d62a9ece674 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/_internal/run.py @@ -0,0 +1,45 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Helper for launching run functions in computing clusters. + +During the submit process, this file is copied to the appropriate run dir. +When the job is launched in the cluster, this module is the first thing that +is run inside the docker container. +""" + +import os +import pickle +import sys + +# PYTHONPATH should have been set so that the run_dir/src is in it +import dnnlib + +def main(): + if not len(sys.argv) >= 4: + raise RuntimeError("This script needs three arguments: run_dir, task_name and host_name!") + + run_dir = str(sys.argv[1]) + task_name = str(sys.argv[2]) + host_name = str(sys.argv[3]) + + submit_config_path = os.path.join(run_dir, "submit_config.pkl") + + # SubmitConfig should have been pickled to the run dir + if not os.path.exists(submit_config_path): + raise RuntimeError("SubmitConfig pickle file does not exist!") + + submit_config: dnnlib.SubmitConfig = pickle.load(open(submit_config_path, "rb")) + dnnlib.submission.submit.set_user_name_override(submit_config.user_name) + + submit_config.task_name = task_name + submit_config.host_name = host_name + + dnnlib.submission.submit.run_wrapper(submit_config) + +if __name__ == "__main__": + main() diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/run_context.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/run_context.py new file mode 100644 index 0000000000000000000000000000000000000000..0a5587ea789289f726d0f9c6215a62b6754168ee --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/run_context.py @@ -0,0 +1,99 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Helpers for managing the run/training loop.""" + +import datetime +import json +import os +import pprint +import time +import types + +from typing import Any + +from . import submit + + +class RunContext(object): + """Helper class for managing the run/training loop. + + The context will hide the implementation details of a basic run/training loop. + It will set things up properly, tell if run should be stopped, and then cleans up. + User should call update periodically and use should_stop to determine if run should be stopped. + + Args: + submit_config: The SubmitConfig that is used for the current run. + config_module: The whole config module that is used for the current run. + max_epoch: Optional cached value for the max_epoch variable used in update. + """ + + def __init__(self, submit_config: submit.SubmitConfig, config_module: types.ModuleType = None, max_epoch: Any = None): + self.submit_config = submit_config + self.should_stop_flag = False + self.has_closed = False + self.start_time = time.time() + self.last_update_time = time.time() + self.last_update_interval = 0.0 + self.max_epoch = max_epoch + + # pretty print the all the relevant content of the config module to a text file + if config_module is not None: + with open(os.path.join(submit_config.run_dir, "config.txt"), "w") as f: + filtered_dict = {k: v for k, v in config_module.__dict__.items() if not k.startswith("_") and not isinstance(v, (types.ModuleType, types.FunctionType, types.LambdaType, submit.SubmitConfig, type))} + pprint.pprint(filtered_dict, stream=f, indent=4, width=200, compact=False) + + # write out details about the run to a text file + self.run_txt_data = {"task_name": submit_config.task_name, "host_name": submit_config.host_name, "start_time": datetime.datetime.now().isoformat(sep=" ")} + with open(os.path.join(submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + + def __enter__(self) -> "RunContext": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def update(self, loss: Any = 0, cur_epoch: Any = 0, max_epoch: Any = None) -> None: + """Do general housekeeping and keep the state of the context up-to-date. + Should be called often enough but not in a tight loop.""" + assert not self.has_closed + + self.last_update_interval = time.time() - self.last_update_time + self.last_update_time = time.time() + + if os.path.exists(os.path.join(self.submit_config.run_dir, "abort.txt")): + self.should_stop_flag = True + + max_epoch_val = self.max_epoch if max_epoch is None else max_epoch + + def should_stop(self) -> bool: + """Tell whether a stopping condition has been triggered one way or another.""" + return self.should_stop_flag + + def get_time_since_start(self) -> float: + """How much time has passed since the creation of the context.""" + return time.time() - self.start_time + + def get_time_since_last_update(self) -> float: + """How much time has passed since the last call to update.""" + return time.time() - self.last_update_time + + def get_last_update_interval(self) -> float: + """How much time passed between the previous two calls to update.""" + return self.last_update_interval + + def close(self) -> None: + """Close the context and clean up. + Should only be called once.""" + if not self.has_closed: + # update the run.txt with stopping time + self.run_txt_data["stop_time"] = datetime.datetime.now().isoformat(sep=" ") + with open(os.path.join(self.submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + + self.has_closed = True diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/submit.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/submit.py new file mode 100644 index 0000000000000000000000000000000000000000..60ff428717c13896bb78625b3eaf651d9fb9695d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/submission/submit.py @@ -0,0 +1,290 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Submit a function to be run either locally or in a computing cluster.""" + +import copy +import io +import os +import pathlib +import pickle +import platform +import pprint +import re +import shutil +import time +import traceback + +import zipfile + +from enum import Enum + +from .. import util +from ..util import EasyDict + + +class SubmitTarget(Enum): + """The target where the function should be run. + + LOCAL: Run it locally. + """ + LOCAL = 1 + + +class PathType(Enum): + """Determines in which format should a path be formatted. + + WINDOWS: Format with Windows style. + LINUX: Format with Linux/Posix style. + AUTO: Use current OS type to select either WINDOWS or LINUX. + """ + WINDOWS = 1 + LINUX = 2 + AUTO = 3 + + +_user_name_override = None + + +class SubmitConfig(util.EasyDict): + """Strongly typed config dict needed to submit runs. + + Attributes: + run_dir_root: Path to the run dir root. Can be optionally templated with tags. Needs to always be run through get_path_from_template. + run_desc: Description of the run. Will be used in the run dir and task name. + run_dir_ignore: List of file patterns used to ignore files when copying files to the run dir. + run_dir_extra_files: List of (abs_path, rel_path) tuples of file paths. rel_path root will be the src directory inside the run dir. + submit_target: Submit target enum value. Used to select where the run is actually launched. + num_gpus: Number of GPUs used/requested for the run. + print_info: Whether to print debug information when submitting. + ask_confirmation: Whether to ask a confirmation before submitting. + run_id: Automatically populated value during submit. + run_name: Automatically populated value during submit. + run_dir: Automatically populated value during submit. + run_func_name: Automatically populated value during submit. + run_func_kwargs: Automatically populated value during submit. + user_name: Automatically populated value during submit. Can be set by the user which will then override the automatic value. + task_name: Automatically populated value during submit. + host_name: Automatically populated value during submit. + """ + + def __init__(self): + super().__init__() + + # run (set these) + self.run_dir_root = "" # should always be passed through get_path_from_template + self.run_desc = "" + self.run_dir_ignore = ["__pycache__", "*.pyproj", "*.sln", "*.suo", ".cache", ".idea", ".vs", ".vscode"] + self.run_dir_extra_files = None + + # submit (set these) + self.submit_target = SubmitTarget.LOCAL + self.num_gpus = 1 + self.print_info = False + self.ask_confirmation = False + + # (automatically populated) + self.run_id = None + self.run_name = None + self.run_dir = None + self.run_func_name = None + self.run_func_kwargs = None + self.user_name = None + self.task_name = None + self.host_name = "localhost" + + +def get_path_from_template(path_template: str, path_type: PathType = PathType.AUTO) -> str: + """Replace tags in the given path template and return either Windows or Linux formatted path.""" + # automatically select path type depending on running OS + if path_type == PathType.AUTO: + if platform.system() == "Windows": + path_type = PathType.WINDOWS + elif platform.system() == "Linux": + path_type = PathType.LINUX + else: + raise RuntimeError("Unknown platform") + + path_template = path_template.replace("", get_user_name()) + + # return correctly formatted path + if path_type == PathType.WINDOWS: + return str(pathlib.PureWindowsPath(path_template)) + elif path_type == PathType.LINUX: + return str(pathlib.PurePosixPath(path_template)) + else: + raise RuntimeError("Unknown platform") + + +def get_template_from_path(path: str) -> str: + """Convert a normal path back to its template representation.""" + # replace all path parts with the template tags + path = path.replace("\\", "/") + return path + + +def convert_path(path: str, path_type: PathType = PathType.AUTO) -> str: + """Convert a normal path to template and the convert it back to a normal path with given path type.""" + path_template = get_template_from_path(path) + path = get_path_from_template(path_template, path_type) + return path + + +def set_user_name_override(name: str) -> None: + """Set the global username override value.""" + global _user_name_override + _user_name_override = name + + +def get_user_name(): + """Get the current user name.""" + if _user_name_override is not None: + return _user_name_override + elif platform.system() == "Windows": + return os.getlogin() + elif platform.system() == "Linux": + try: + import pwd # pylint: disable=import-error + return pwd.getpwuid(os.geteuid()).pw_name # pylint: disable=no-member + except: + return "unknown" + else: + raise RuntimeError("Unknown platform") + + +def _create_run_dir_local(submit_config: SubmitConfig) -> str: + """Create a new run dir with increasing ID number at the start.""" + run_dir_root = get_path_from_template(submit_config.run_dir_root, PathType.AUTO) + + if not os.path.exists(run_dir_root): + print("Creating the run dir root: {}".format(run_dir_root)) + os.makedirs(run_dir_root) + + submit_config.run_id = _get_next_run_id_local(run_dir_root) + submit_config.run_name = "{0:05d}-{1}".format(submit_config.run_id, submit_config.run_desc) + run_dir = os.path.join(run_dir_root, submit_config.run_name) + + if os.path.exists(run_dir): + raise RuntimeError("The run dir already exists! ({0})".format(run_dir)) + + print("Creating the run dir: {}".format(run_dir)) + os.makedirs(run_dir) + + return run_dir + + +def _get_next_run_id_local(run_dir_root: str) -> int: + """Reads all directory names in a given directory (non-recursive) and returns the next (increasing) run id. Assumes IDs are numbers at the start of the directory names.""" + dir_names = [d for d in os.listdir(run_dir_root) if os.path.isdir(os.path.join(run_dir_root, d))] + r = re.compile("^\\d+") # match one or more digits at the start of the string + run_id = 0 + + for dir_name in dir_names: + m = r.match(dir_name) + + if m is not None: + i = int(m.group()) + run_id = max(run_id, i + 1) + + return run_id + + +def _populate_run_dir(run_dir: str, submit_config: SubmitConfig) -> None: + """Copy all necessary files into the run dir. Assumes that the dir exists, is local, and is writable.""" + print("Copying files to the run dir") + files = [] + + run_func_module_dir_path = util.get_module_dir_by_obj_name(submit_config.run_func_name) + assert '.' in submit_config.run_func_name + for _idx in range(submit_config.run_func_name.count('.') - 1): + run_func_module_dir_path = os.path.dirname(run_func_module_dir_path) + files += util.list_dir_recursively_with_ignore(run_func_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=False) + + dnnlib_module_dir_path = util.get_module_dir_by_obj_name("dnnlib") + files += util.list_dir_recursively_with_ignore(dnnlib_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=True) + + if submit_config.run_dir_extra_files is not None: + files += submit_config.run_dir_extra_files + + files = [(f[0], os.path.join(run_dir, "src", f[1])) for f in files] + files += [(os.path.join(dnnlib_module_dir_path, "submission", "_internal", "run.py"), os.path.join(run_dir, "run.py"))] + + util.copy_files_and_create_dirs(files) + + pickle.dump(submit_config, open(os.path.join(run_dir, "submit_config.pkl"), "wb")) + + with open(os.path.join(run_dir, "submit_config.txt"), "w") as f: + pprint.pprint(submit_config, stream=f, indent=4, width=200, compact=False) + + +def run_wrapper(submit_config: SubmitConfig) -> None: + """Wrap the actual run function call for handling logging, exceptions, typing, etc.""" + is_local = submit_config.submit_target == SubmitTarget.LOCAL + + checker = None + + # when running locally, redirect stderr to stdout, log stdout to a file, and force flushing + if is_local: + logger = util.Logger(file_name=os.path.join(submit_config.run_dir, "log.txt"), file_mode="w", should_flush=True) + else: # when running in a cluster, redirect stderr to stdout, and just force flushing (log writing is handled by run.sh) + logger = util.Logger(file_name=None, should_flush=True) + + import dnnlib + dnnlib.submit_config = submit_config + + try: + print("dnnlib: Running {0}() on {1}...".format(submit_config.run_func_name, submit_config.host_name)) + start_time = time.time() + util.call_func_by_name(func_name=submit_config.run_func_name, submit_config=submit_config, **submit_config.run_func_kwargs) + print("dnnlib: Finished {0}() in {1}.".format(submit_config.run_func_name, util.format_time(time.time() - start_time))) + except: + if is_local: + raise + else: + traceback.print_exc() + + log_src = os.path.join(submit_config.run_dir, "log.txt") + log_dst = os.path.join(get_path_from_template(submit_config.run_dir_root), "{0}-error.txt".format(submit_config.run_name)) + shutil.copyfile(log_src, log_dst) + finally: + open(os.path.join(submit_config.run_dir, "_finished.txt"), "w").close() + + dnnlib.submit_config = None + logger.close() + + if checker is not None: + checker.stop() + + +def submit_run(submit_config: SubmitConfig, run_func_name: str, **run_func_kwargs) -> None: + """Create a run dir, gather files related to the run, copy files to the run dir, and launch the run in appropriate place.""" + submit_config = copy.copy(submit_config) + + if submit_config.user_name is None: + submit_config.user_name = get_user_name() + + submit_config.run_func_name = run_func_name + submit_config.run_func_kwargs = run_func_kwargs + + assert submit_config.submit_target == SubmitTarget.LOCAL + if submit_config.submit_target in {SubmitTarget.LOCAL}: + run_dir = _create_run_dir_local(submit_config) + + submit_config.task_name = "{0}-{1:05d}-{2}".format(submit_config.user_name, submit_config.run_id, submit_config.run_desc) + submit_config.run_dir = run_dir + _populate_run_dir(run_dir, submit_config) + + if submit_config.print_info: + print("\nSubmit config:\n") + pprint.pprint(submit_config, indent=4, width=200, compact=False) + print() + + if submit_config.ask_confirmation: + if not util.ask_yes_no("Continue submitting the job?"): + return + + run_wrapper(submit_config) diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/__init__.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d46901757829443f6d704bbc13079cba58ca1c92 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +from . import autosummary +from . import network +from . import optimizer +from . import tfutil + +from .tfutil import * +from .network import Network + +from .optimizer import Optimizer diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/autosummary.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/autosummary.py new file mode 100644 index 0000000000000000000000000000000000000000..23023dd4fc4c6c25106fe3a4d7dfae08eb83a0f4 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/autosummary.py @@ -0,0 +1,184 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Helper for adding automatically tracked values to Tensorboard. + +Autosummary creates an identity op that internally keeps track of the input +values and automatically shows up in TensorBoard. The reported value +represents an average over input components. The average is accumulated +constantly over time and flushed when save_summaries() is called. + +Notes: +- The output tensor must be used as an input for something else in the + graph. Otherwise, the autosummary op will not get executed, and the average + value will not get accumulated. +- It is perfectly fine to include autosummaries with the same name in + several places throughout the graph, even if they are executed concurrently. +- It is ok to also pass in a python scalar or numpy array. In this case, it + is added to the average immediately. +""" + +from collections import OrderedDict +import numpy as np +import tensorflow as tf +from tensorboard import summary as summary_lib +from tensorboard.plugins.custom_scalar import layout_pb2 + +from . import tfutil +from .tfutil import TfExpression +from .tfutil import TfExpressionEx + +_dtype = tf.float64 +_vars = OrderedDict() # name => [var, ...] +_immediate = OrderedDict() # name => update_op, update_value +_finalized = False +_merge_op = None + + +def _create_var(name: str, value_expr: TfExpression) -> TfExpression: + """Internal helper for creating autosummary accumulators.""" + assert not _finalized + name_id = name.replace("/", "_") + v = tf.cast(value_expr, _dtype) + + if v.shape.is_fully_defined(): + size = np.prod(tfutil.shape_to_list(v.shape)) + size_expr = tf.constant(size, dtype=_dtype) + else: + size = None + size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) + + if size == 1: + if v.shape.ndims != 0: + v = tf.reshape(v, []) + v = [size_expr, v, tf.square(v)] + else: + v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] + v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) + + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): + var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + + if name in _vars: + _vars[name].append(var) + else: + _vars[name] = [var] + return update_op + + +def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None) -> TfExpressionEx: + """Create a new autosummary. + + Args: + name: Name to use in TensorBoard + value: TensorFlow expression or python value to track + passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. + + Example use of the passthru mechanism: + + n = autosummary('l2loss', loss, passthru=n) + + This is a shorthand for the following code: + + with tf.control_dependencies([autosummary('l2loss', loss)]): + n = tf.identity(n) + """ + tfutil.assert_tf_initialized() + name_id = name.replace("/", "_") + + if tfutil.is_tf_expression(value): + with tf.name_scope("summary_" + name_id), tf.device(value.device): + update_op = _create_var(name, value) + with tf.control_dependencies([update_op]): + return tf.identity(value if passthru is None else passthru) + + else: # python scalar or numpy array + if name not in _immediate: + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(_dtype) + update_op = _create_var(name, update_value) + _immediate[name] = update_op, update_value + + update_op, update_value = _immediate[name] + tfutil.run(update_op, {update_value: value}) + return value if passthru is None else passthru + + +def finalize_autosummaries() -> None: + """Create the necessary ops to include autosummaries in TensorBoard report. + Note: This should be done only once per graph. + """ + global _finalized + tfutil.assert_tf_initialized() + + if _finalized: + return None + + _finalized = True + tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) + + # Create summary ops. + with tf.device(None), tf.control_dependencies(None): + for name, vars_list in _vars.items(): + name_id = name.replace("/", "_") + with tfutil.absolute_name_scope("Autosummary/" + name_id): + moments = tf.add_n(vars_list) + moments /= moments[0] + with tf.control_dependencies([moments]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + mean = moments[1] + std = tf.sqrt(moments[2] - tf.square(moments[1])) + tf.summary.scalar(name, mean) + tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) + tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) + + # Group by category and chart name. + cat_dict = OrderedDict() + for series_name in sorted(_vars.keys()): + p = series_name.split("/") + cat = p[0] if len(p) >= 2 else "" + chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] + if cat not in cat_dict: + cat_dict[cat] = OrderedDict() + if chart not in cat_dict[cat]: + cat_dict[cat][chart] = [] + cat_dict[cat][chart].append(series_name) + + # Setup custom_scalar layout. + categories = [] + for cat_name, chart_dict in cat_dict.items(): + charts = [] + for chart_name, series_names in chart_dict.items(): + series = [] + for series_name in series_names: + series.append(layout_pb2.MarginChartContent.Series( + value=series_name, + lower="xCustomScalars/" + series_name + "/margin_lo", + upper="xCustomScalars/" + series_name + "/margin_hi")) + margin = layout_pb2.MarginChartContent(series=series) + charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) + categories.append(layout_pb2.Category(title=cat_name, chart=charts)) + layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) + return layout + +def save_summaries(file_writer, global_step=None): + """Call FileWriter.add_summary() with all summaries in the default graph, + automatically finalizing and merging them on the first call. + """ + global _merge_op + tfutil.assert_tf_initialized() + + if _merge_op is None: + layout = finalize_autosummaries() + if layout is not None: + file_writer.add_summary(layout) + with tf.device(None), tf.control_dependencies(None): + _merge_op = tf.summary.merge_all() + + file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/network.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/network.py new file mode 100644 index 0000000000000000000000000000000000000000..7a998564de27cea67abb846d88b265a03e3e7640 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/network.py @@ -0,0 +1,591 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Helper for managing networks.""" + +import types +import inspect +import re +import uuid +import sys +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import Any, List, Tuple, Union + +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_import_module_src = dict() # Source code for temporary modules created during pickle import. + + +def import_handler(handler_func): + """Function decorator for declaring custom import handlers.""" + _import_handlers.append(handler_func) + return handler_func + + +class Network: + """Generic network abstraction. + + Acts as a convenience wrapper for a parameterized network construction + function, providing several utility methods and convenient access to + the inputs/outputs/weights. + + Network objects can be safely pickled and unpickled for long-term + archival purposes. The pickling works reliably as long as the underlying + network construction function is defined in a standalone Python module + that has no side effects or application-specific imports. + + Args: + name: Network name. Used to select TensorFlow name and variable scopes. + func_name: Fully qualified name of the underlying network construction function, or a top-level function object. + static_kwargs: Keyword arguments to be passed in to the network construction function. + + Attributes: + name: User-specified name, defaults to build func name if None. + scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name. + static_kwargs: Arguments passed to the user-supplied build func. + components: Container for sub-networks. Passed to the build func, and retained between calls. + num_inputs: Number of input tensors. + num_outputs: Number of output tensors. + input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension. + output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension. + input_shape: Short-hand for input_shapes[0]. + output_shape: Short-hand for output_shapes[0]. + input_templates: Input placeholders in the template graph. + output_templates: Output tensors in the template graph. + input_names: Name string for each input. + output_names: Name string for each output. + own_vars: Variables defined by this network (local_name => var), excluding sub-networks. + vars: All variables (local_name => var). + trainables: All trainable variables (local_name => var). + var_global_to_local: Mapping from variable global names to local names. + """ + + def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): + tfutil.assert_tf_initialized() + assert isinstance(name, str) or name is None + assert func_name is not None + assert isinstance(func_name, str) or util.is_top_level_function(func_name) + assert util.is_pickleable(static_kwargs) + + self._init_fields() + self.name = name + self.static_kwargs = util.EasyDict(static_kwargs) + + # Locate the user-specified network build function. + if util.is_top_level_function(func_name): + func_name = util.get_top_level_function_name(func_name) + module, self._build_func_name = util.get_module_from_obj_name(func_name) + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Dig up source code for the module containing the build function. + self._build_module_src = _import_module_src.get(module, None) + if self._build_module_src is None: + self._build_module_src = inspect.getsource(module) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + + def _init_fields(self) -> None: + self.name = None + self.scope = None + self.static_kwargs = util.EasyDict() + self.components = util.EasyDict() + self.num_inputs = 0 + self.num_outputs = 0 + self.input_shapes = [[]] + self.output_shapes = [[]] + self.input_shape = [] + self.output_shape = [] + self.input_templates = [] + self.output_templates = [] + self.input_names = [] + self.output_names = [] + self.own_vars = OrderedDict() + self.vars = OrderedDict() + self.trainables = OrderedDict() + self.var_global_to_local = OrderedDict() + + self._build_func = None # User-supplied build function that constructs the network. + self._build_func_name = None # Name of the build function. + self._build_module_src = None # Full source code of the module containing the build function. + self._run_cache = dict() # Cached graph data for Network.run(). + + def _init_graph(self) -> None: + # Collect inputs. + self.input_names = [] + + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self.input_names.append(param.name) + + self.num_inputs = len(self.input_names) + assert self.num_inputs >= 1 + + # Choose name and scope. + if self.name is None: + self.name = self._build_func_name + assert re.match("^[A-Za-z0-9_.\\-]*$", self.name) + with tf.name_scope(None): + self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs["is_template_graph"] = True + build_kwargs["components"] = self.components + + # Build template graph. + with tfutil.absolute_variable_scope(self.scope, reuse=tf.AUTO_REUSE), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes + assert tf.get_variable_scope().name == self.scope + assert tf.get_default_graph().get_name_scope() == self.scope + with tf.control_dependencies(None): # ignore surrounding control dependencies + self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + out_expr = self._build_func(*self.input_templates, **build_kwargs) + + # Collect outputs. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + self.num_outputs = len(self.output_templates) + assert self.num_outputs >= 1 + assert all(tfutil.is_tf_expression(t) for t in self.output_templates) + + # Perform sanity checks. + if any(t.shape.ndims is None for t in self.input_templates): + raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") + if any(t.shape.ndims is None for t in self.output_templates): + raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") + if any(not isinstance(comp, Network) for comp in self.components.values()): + raise ValueError("Components of a Network must be Networks themselves.") + if len(self.components) != len(set(comp.name for comp in self.components.values())): + raise ValueError("Components of a Network must have unique names.") + + # List inputs and outputs. + self.input_shapes = [tfutil.shape_to_list(t.shape) for t in self.input_templates] + self.output_shapes = [tfutil.shape_to_list(t.shape) for t in self.output_templates] + self.input_shape = self.input_shapes[0] + self.output_shape = self.output_shapes[0] + self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] + + # List variables. + self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) + self.vars = OrderedDict(self.own_vars) + self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items()) + self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) + self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) + + def reset_own_vars(self) -> None: + """Re-initialize all variables of this network, excluding sub-networks.""" + tfutil.run([var.initializer for var in self.own_vars.values()]) + + def reset_vars(self) -> None: + """Re-initialize all variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.vars.values()]) + + def reset_trainables(self) -> None: + """Re-initialize all trainable variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.trainables.values()]) + + def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: + """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s).""" + assert len(in_expr) == self.num_inputs + assert not all(expr is None for expr in in_expr) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs.update(dynamic_kwargs) + build_kwargs["is_template_graph"] = False + build_kwargs["components"] = self.components + + # Build TensorFlow graph to evaluate the network. + with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): + assert tf.get_variable_scope().name == self.scope + valid_inputs = [expr for expr in in_expr if expr is not None] + final_inputs = [] + for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): + if expr is not None: + expr = tf.identity(expr, name=name) + else: + expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) + final_inputs.append(expr) + out_expr = self._build_func(*final_inputs, **build_kwargs) + + # Propagate input shapes back to the user-specified expressions. + for expr, final in zip(in_expr, final_inputs): + if isinstance(expr, tf.Tensor): + expr.set_shape(final.shape) + + # Express outputs in the desired format. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + return out_expr + + def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: + """Get the local name of a given variable, without any surrounding name scopes.""" + assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) + global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name + return self.var_global_to_local[global_name] + + def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: + """Find variable by local or global name.""" + assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) + return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name + + def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: + """Get the value of a given variable as NumPy array. + Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" + return self.find_var(var_or_local_name).eval() + + def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: + """Set the value of a given variable based on the given NumPy array. + Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" + tfutil.set_vars({self.find_var(var_or_local_name): new_value}) + + def __getstate__(self) -> dict: + """Pickle export.""" + state = dict() + state["version"] = 3 + state["name"] = self.name + state["static_kwargs"] = dict(self.static_kwargs) + state["components"] = dict(self.components) + state["build_module_src"] = self._build_module_src + state["build_func_name"] = self._build_func_name + state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values())))) + return state + + def __setstate__(self, state: dict) -> None: + """Pickle import.""" + # pylint: disable=attribute-defined-outside-init + tfutil.assert_tf_initialized() + self._init_fields() + + # Execute custom import handlers. + for handler in _import_handlers: + state = handler(state) + + # Set basic fields. + assert state["version"] in [2, 3] + self.name = state["name"] + self.static_kwargs = util.EasyDict(state["static_kwargs"]) + self.components = util.EasyDict(state.get("components", {})) + self._build_module_src = state["build_module_src"] + self._build_func_name = state["build_func_name"] + + # Create temporary module from the imported source code. + module_name = "_tflib_network_import_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _import_module_src[module] = self._build_module_src + exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used + + # Locate network build function in the temporary module. + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]}) + + def clone(self, name: str = None, **new_static_kwargs) -> "Network": + """Create a clone of this network with its own copy of the variables.""" + # pylint: disable=protected-access + net = object.__new__(Network) + net._init_fields() + net.name = name if name is not None else self.name + net.static_kwargs = util.EasyDict(self.static_kwargs) + net.static_kwargs.update(new_static_kwargs) + net._build_module_src = self._build_module_src + net._build_func_name = self._build_func_name + net._build_func = self._build_func + net._init_graph() + net.copy_vars_from(self) + return net + + def copy_own_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, excluding sub-networks.""" + names = [name for name in self.own_vars.keys() if name in src_net.own_vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, including sub-networks.""" + names = [name for name in self.vars.keys() if name in src_net.vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_trainables_from(self, src_net: "Network") -> None: + """Copy the values of all trainable variables from the given network, including sub-networks.""" + names = [name for name in self.trainables.keys() if name in src_net.trainables] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": + """Create new network with the given parameters, and copy all variables from this network.""" + if new_name is None: + new_name = self.name + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = Network(name=new_name, func_name=new_func_name, **static_kwargs) + net.copy_vars_from(self) + return net + + def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: + """Construct a TensorFlow op that updates the variables of this network + to be slightly closer to those of the given network.""" + with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): + ops = [] + for name, var in self.vars.items(): + if name in src_net.vars: + cur_beta = beta if name in self.trainables else beta_nontrainable + new_value = tfutil.lerp(src_net.vars[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + def run(self, + *in_arrays: Tuple[Union[np.ndarray, None], ...], + input_transform: dict = None, + output_transform: dict = None, + return_as_list: bool = False, + print_progress: bool = False, + minibatch_size: int = None, + num_gpus: int = 1, + assume_frozen: bool = False, + **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: + """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + + Args: + input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the input + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the output + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress: Print progress to the console? Useful for very large input arrays. + minibatch_size: Maximum minibatch size to use, None = disable batching. + num_gpus: Number of GPUs to use. + assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. + dynamic_kwargs: Additional keyword arguments to be passed into the network build function. + """ + assert len(in_arrays) == self.num_inputs + assert not all(arr is None for arr in in_arrays) + assert input_transform is None or util.is_top_level_function(input_transform["func"]) + assert output_transform is None or util.is_top_level_function(output_transform["func"]) + output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + + # Construct unique hash key from all arguments that affect the TensorFlow graph. + key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) + def unwind_key(obj): + if isinstance(obj, dict): + return [(key, unwind_key(value)) for key, value in sorted(obj.items())] + if callable(obj): + return util.get_top_level_function_name(obj) + return obj + key = repr(unwind_key(key)) + + # Build graph. + if key not in self._run_cache: + with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): + with tf.device("/cpu:0"): + in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) + + out_split = [] + for gpu in range(num_gpus): + with tf.device("/gpu:%d" % gpu): + net_gpu = self.clone() if assume_frozen else self + in_gpu = in_split[gpu] + + if input_transform is not None: + in_kwargs = dict(input_transform) + in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) + in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) + + assert len(in_gpu) == self.num_inputs + out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) + + if output_transform is not None: + out_kwargs = dict(output_transform) + out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) + out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) + + assert len(out_gpu) == self.num_outputs + out_split.append(out_gpu) + + with tf.device("/cpu:0"): + out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + self._run_cache[key] = in_expr, out_expr + + # Run minibatches. + in_expr, out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + tfutil.shape_to_list(expr.shape)[1:], expr.dtype.name) for expr in out_expr] + + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print("\r%d / %d" % (mb_begin, num_items), end="") + + mb_end = min(mb_begin + minibatch_size, num_items) + mb_num = mb_end - mb_begin + mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] + mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) + + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin: mb_end] = src + + # Done. + if print_progress: + print("\r%d / %d" % (num_items, num_items)) + + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + def list_ops(self) -> List[TfExpression]: + include_prefix = self.scope + "/" + exclude_prefix = include_prefix + "_" + ops = tf.get_default_graph().get_operations() + ops = [op for op in ops if op.name.startswith(include_prefix)] + ops = [op for op in ops if not op.name.startswith(exclude_prefix)] + return ops + + def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: + """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to + individual layers of the network. Mainly intended to be used for reporting.""" + layers = [] + + def recurse(scope, parent_ops, parent_vars, level): + # Ignore specific patterns. + if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): + return + + # Filter ops and vars by scope. + global_prefix = scope + "/" + local_prefix = global_prefix[len(self.scope) + 1:] + cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] + cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] + if not cur_ops and not cur_vars: + return + + # Filter out all ops related to variables. + for var in [op for op in cur_ops if op.type.startswith("Variable")]: + var_prefix = var.name + "/" + cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] + + # Scope does not contain ops as immediate children => recurse deeper. + contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type != "Identity" for op in cur_ops) + if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1: + visited = set() + for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: + token = rel_name.split("/")[0] + if token not in visited: + recurse(global_prefix + token, cur_ops, cur_vars, level + 1) + visited.add(token) + return + + # Report layer. + layer_name = scope[len(self.scope) + 1:] + layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] + layer_trainables = [var for _name, var in cur_vars if var.trainable] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, self.list_ops(), list(self.vars.items()), 0) + return layers + + def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: + """Print a summary table of the network structure.""" + rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] + rows += [["---"] * 4] + total_params = 0 + + for layer_name, layer_output, layer_trainables in self.list_layers(): + num_params = sum(np.prod(tfutil.shape_to_list(var.shape)) for var in layer_trainables) + weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] + weights.sort(key=lambda x: len(x.name)) + if len(weights) == 0 and len(layer_trainables) == 1: + weights = layer_trainables + total_params += num_params + + if not hide_layers_with_no_params or num_params != 0: + num_params_str = str(num_params) if num_params > 0 else "-" + output_shape_str = str(layer_output.shape) + weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" + rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] + + rows += [["---"] * 4] + rows += [["Total", str(total_params), "", ""]] + + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) + print() + + def setup_weight_histograms(self, title: str = None) -> None: + """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" + if title is None: + title = self.name + + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for local_name, var in self.trainables.items(): + if "/" in local_name: + p = local_name.split("/") + name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) + else: + name = title + "_toplevel/" + local_name + + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- +# Backwards-compatible emulation of legacy output transformation in Network.run(). + +_print_legacy_warning = True + +def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): + global _print_legacy_warning + legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] + if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): + return output_transform, dynamic_kwargs + + if _print_legacy_warning: + _print_legacy_warning = False + print() + print("WARNING: Old-style output transformations in Network.run() are deprecated.") + print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") + print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") + print() + assert output_transform is None + + new_kwargs = dict(dynamic_kwargs) + new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} + new_transform["func"] = _legacy_output_transform_func + return new_transform, new_kwargs + +def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): + if out_mul != 1.0: + expr = [x * out_mul for x in expr] + + if out_add != 0.0: + expr = [x + out_add for x in expr] + + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] + + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + expr = [tf.round(x) for x in expr] + expr = [tf.saturate_cast(x, out_dtype) for x in expr] + return expr diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/optimizer.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..6ed88cb236365234597f8734299fbb315c56cc73 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/optimizer.py @@ -0,0 +1,214 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Helper wrapper for a Tensorflow optimizer.""" + +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import List, Union + +from . import autosummary +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +try: + # TensorFlow 1.13 + from tensorflow.python.ops import nccl_ops +except: + # Older TensorFlow versions + import tensorflow.contrib.nccl as nccl_ops + +class Optimizer: + """A Wrapper for tf.train.Optimizer. + + Automatically takes care of: + - Gradient averaging for multi-GPU training. + - Dynamic loss scaling and typecasts for FP16 training. + - Ignoring corrupted gradients that contain NaNs/Infs. + - Reporting statistics. + - Well-chosen default settings. + """ + + def __init__(self, + name: str = "Train", + tf_optimizer: str = "tf.train.AdamOptimizer", + learning_rate: TfExpressionEx = 0.001, + use_loss_scaling: bool = False, + loss_scaling_init: float = 64.0, + loss_scaling_inc: float = 0.0005, + loss_scaling_dec: float = 1.0, + **kwargs): + + # Init fields. + self.name = name + self.learning_rate = tf.convert_to_tensor(learning_rate) + self.id = self.name.replace("/", ".") + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = util.get_obj_by_name(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + self._grad_shapes = None # [shape, ...] + self._dev_opt = OrderedDict() # device => optimizer + self._dev_grads = OrderedDict() # device => [[(grad, var), ...], ...] + self._dev_ls_var = OrderedDict() # device => variable (log2 of loss scaling factor) + self._updates_applied = False + + def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: + """Register the gradients of the given loss function with respect to the given variables. + Intended to be called once per GPU.""" + assert not self._updates_applied + + # Validate arguments. + if isinstance(trainable_vars, dict): + trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars + + assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 + assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) + + if self._grad_shapes is None: + self._grad_shapes = [tfutil.shape_to_list(var.shape) for var in trainable_vars] + + assert len(trainable_vars) == len(self._grad_shapes) + assert all(tfutil.shape_to_list(var.shape) == var_shape for var, var_shape in zip(trainable_vars, self._grad_shapes)) + + dev = loss.device + + assert all(var.device == dev for var in trainable_vars) + + # Register device and compute gradients. + with tf.name_scope(self.id + "_grad"), tf.device(dev): + if dev not in self._dev_opt: + opt_name = self.scope.replace("/", "_") + "_opt%d" % len(self._dev_opt) + assert callable(self.optimizer_class) + self._dev_opt[dev] = self.optimizer_class(name=opt_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + self._dev_grads[dev] = [] + + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + grads = self._dev_opt[dev].compute_gradients(loss, trainable_vars, gate_gradients=tf.train.Optimizer.GATE_NONE) # disable gating to reduce memory usage + grads = [(g, v) if g is not None else (tf.zeros_like(v), v) for g, v in grads] # replace disconnected gradients with zeros + self._dev_grads[dev].append(grads) + + def apply_updates(self) -> tf.Operation: + """Construct training op to update the registered variables based on their gradients.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + self._updates_applied = True + devices = list(self._dev_grads.keys()) + total_grads = sum(len(grads) for grads in self._dev_grads.values()) + assert len(devices) >= 1 and total_grads >= 1 + ops = [] + + with tfutil.absolute_name_scope(self.scope): + # Cast gradients to FP32 and calculate partial sum within each device. + dev_grads = OrderedDict() # device => [(grad, var), ...] + + for dev_idx, dev in enumerate(devices): + with tf.name_scope("ProcessGrads%d" % dev_idx), tf.device(dev): + sums = [] + + for gv in zip(*self._dev_grads[dev]): + assert all(v is gv[0][1] for g, v in gv) + g = [tf.cast(g, tf.float32) for g, v in gv] + g = g[0] if len(g) == 1 else tf.add_n(g) + sums.append((g, gv[0][1])) + + dev_grads[dev] = sums + + # Sum gradients across devices. + if len(devices) > 1: + with tf.name_scope("SumAcrossGPUs"), tf.device(None): + for var_idx, grad_shape in enumerate(self._grad_shapes): + g = [dev_grads[dev][var_idx][0] for dev in devices] + + if np.prod(grad_shape): # nccl does not support zero-sized tensors + g = nccl_ops.all_sum(g) + + for dev, gg in zip(devices, g): + dev_grads[dev][var_idx] = (gg, dev_grads[dev][var_idx][1]) + + # Apply updates separately on each device. + for dev_idx, (dev, grads) in enumerate(dev_grads.items()): + with tf.name_scope("ApplyGrads%d" % dev_idx), tf.device(dev): + # Scale gradients as needed. + if self.use_loss_scaling or total_grads > 1: + with tf.name_scope("Scale"): + coef = tf.constant(np.float32(1.0 / total_grads), name="coef") + coef = self.undo_loss_scaling(coef) + grads = [(g * coef, v) for g, v in grads] + + # Check for overflows. + with tf.name_scope("CheckOverflow"): + grad_ok = tf.reduce_all(tf.stack([tf.reduce_all(tf.is_finite(g)) for g, v in grads])) + + # Update weights and adjust loss scaling. + with tf.name_scope("UpdateWeights"): + # pylint: disable=cell-var-from-loop + opt = self._dev_opt[dev] + ls_var = self.get_loss_scaling_var(dev) + + if not self.use_loss_scaling: + ops.append(tf.cond(grad_ok, lambda: opt.apply_gradients(grads), tf.no_op)) + else: + ops.append(tf.cond(grad_ok, + lambda: tf.group(tf.assign_add(ls_var, self.loss_scaling_inc), opt.apply_gradients(grads)), + lambda: tf.group(tf.assign_sub(ls_var, self.loss_scaling_dec)))) + + # Report statistics on the last device. + if dev == devices[-1]: + with tf.name_scope("Statistics"): + ops.append(autosummary.autosummary(self.id + "/learning_rate", self.learning_rate)) + ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(grad_ok, 0, 1))) + + if self.use_loss_scaling: + ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", ls_var)) + + # Initialize variables and group everything into a single op. + self.reset_optimizer_state() + tfutil.init_uninitialized_vars(list(self._dev_ls_var.values())) + + return tf.group(*ops, name="TrainingOp") + + def reset_optimizer_state(self) -> None: + """Reset internal state of the underlying optimizer.""" + tfutil.assert_tf_initialized() + tfutil.run([var.initializer for opt in self._dev_opt.values() for var in opt.variables()]) + + def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: + """Get or create variable representing log2 of the current dynamic loss scaling factor.""" + if not self.use_loss_scaling: + return None + + if device not in self._dev_ls_var: + with tfutil.absolute_name_scope(self.scope + "/LossScalingVars"), tf.control_dependencies(None): + self._dev_ls_var[device] = tf.Variable(np.float32(self.loss_scaling_init), name="loss_scaling_var") + + return self._dev_ls_var[device] + + def apply_loss_scaling(self, value: TfExpression) -> TfExpression: + """Apply dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + + if not self.use_loss_scaling: + return value + + return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) + + def undo_loss_scaling(self, value: TfExpression) -> TfExpression: + """Undo the effect of dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + + if not self.use_loss_scaling: + return value + + return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/tfutil.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/tfutil.py new file mode 100644 index 0000000000000000000000000000000000000000..58c1a14ee17539a70bad83786688df49309c5ca8 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/tflib/tfutil.py @@ -0,0 +1,240 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Miscellaneous helper utils for Tensorflow.""" + +import os +import numpy as np +import tensorflow as tf + +from typing import Any, Iterable, List, Union + +TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] +"""A type that represents a valid Tensorflow expression.""" + +TfExpressionEx = Union[TfExpression, int, float, np.ndarray] +"""A type that can be converted to a valid Tensorflow expression.""" + + +def run(*args, **kwargs) -> Any: + """Run the specified ops in the default session.""" + assert_tf_initialized() + return tf.get_default_session().run(*args, **kwargs) + + +def is_tf_expression(x: Any) -> bool: + """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" + return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) + + +def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: + """Convert a Tensorflow shape to a list of ints.""" + return [dim.value for dim in shape] + + +def flatten(x: TfExpressionEx) -> TfExpression: + """Shortcut function for flattening a tensor.""" + with tf.name_scope("Flatten"): + return tf.reshape(x, [-1]) + + +def log2(x: TfExpressionEx) -> TfExpression: + """Logarithm in base 2.""" + with tf.name_scope("Log2"): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + + +def exp2(x: TfExpressionEx) -> TfExpression: + """Exponent in base 2.""" + with tf.name_scope("Exp2"): + return tf.exp(x * np.float32(np.log(2.0))) + + +def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: + """Linear interpolation.""" + with tf.name_scope("Lerp"): + return a + (b - a) * t + + +def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: + """Linear interpolation with clip.""" + with tf.name_scope("LerpClip"): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + + +def absolute_name_scope(scope: str) -> tf.name_scope: + """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" + return tf.name_scope(scope + "/") + + +def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: + """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" + return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) + + +def _sanitize_tf_config(config_dict: dict = None) -> dict: + # Defaults. + cfg = dict() + cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. + cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. + cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. + cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. + + # User overrides. + if config_dict is not None: + cfg.update(config_dict) + return cfg + + +def init_tf(config_dict: dict = None) -> None: + """Initialize TensorFlow session using good default settings.""" + # Skip if already initialized. + if tf.get_default_session() is not None: + return + + # Setup config dict and random seeds. + cfg = _sanitize_tf_config(config_dict) + np_random_seed = cfg["rnd.np_random_seed"] + if np_random_seed is not None: + np.random.seed(np_random_seed) + tf_random_seed = cfg["rnd.tf_random_seed"] + if tf_random_seed == "auto": + tf_random_seed = np.random.randint(1 << 31) + if tf_random_seed is not None: + tf.set_random_seed(tf_random_seed) + + # Setup environment variables. + for key, value in list(cfg.items()): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + os.environ[fields[1]] = str(value) + + # Create default TensorFlow session. + create_session(cfg, force_as_default=True) + + +def assert_tf_initialized(): + """Check that TensorFlow session has been initialized.""" + if tf.get_default_session() is None: + raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") + + +def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: + """Create tf.Session based on config dict.""" + # Setup TensorFlow config proto. + cfg = _sanitize_tf_config(config_dict) + config_proto = tf.ConfigProto() + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] not in ["rnd", "env"]: + obj = config_proto + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + + # Create session. + session = tf.Session(config=config_proto) + if force_as_default: + # pylint: disable=protected-access + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() # pylint: disable=no-member + + return session + + +def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: + """Initialize all tf.Variables that have not already been initialized. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tf.variables_initializer(tf.report_uninitialized_variables()).run() + """ + assert_tf_initialized() + if target_vars is None: + target_vars = tf.global_variables() + + test_vars = [] + test_ops = [] + + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in target_vars: + assert is_tf_expression(var) + + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + + with absolute_name_scope(var.name.split(":")[0]): + test_ops.append(tf.is_variable_initialized(var)) + + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + + +def set_vars(var_to_value_dict: dict) -> None: + """Set the values of given tf.Variables. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + """ + assert_tf_initialized() + ops = [] + feed_dict = {} + + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(":")[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter + + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + + run(ops, feed_dict) + + +def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): + """Create tf.Variable with large initial value without bloating the tf graph.""" + assert_tf_initialized() + assert isinstance(initial_value, np.ndarray) + zeros = tf.zeros(initial_value.shape, initial_value.dtype) + var = tf.Variable(zeros, *args, **kwargs) + set_vars({var: initial_value}) + return var + + +def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): + """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. + Can be used as an input transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if nhwc_to_nchw: + images = tf.transpose(images, [0, 3, 1, 2]) + return (images - drange[0]) * ((drange[1] - drange[0]) / 255) + + +def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): + """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. + Can be used as an output transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if shrink > 1: + ksize = [1, 1, shrink, shrink] + images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") + if nchw_to_nhwc: + images = tf.transpose(images, [0, 2, 3, 1]) + scale = 255 / (drange[1] - drange[0]) + images = images * scale + (0.5 - drange[0] * scale) + return tf.saturate_cast(images, tf.uint8) diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/util.py b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/util.py new file mode 100644 index 0000000000000000000000000000000000000000..133ef764c0707d9384a33f0350ba71b1e624072f --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/dnnlib/util.py @@ -0,0 +1,405 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: str) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + return obj.__module__ + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert is_url(url) + assert num_attempts >= 1 + + # Lookup from cache. + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache_dir is not None: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + return open(cache_files[0], "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive quota exceeded") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache_dir is not None: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + + # Return data as file object. + return io.BytesIO(url_data) diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/generate_figures.py b/ContraCLIP/models/genforce/converters/stylegan_official/generate_figures.py new file mode 100644 index 0000000000000000000000000000000000000000..45b68b86146198c701a66fb8ba7a363d901d6951 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/generate_figures.py @@ -0,0 +1,161 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Minimal script for reproducing the figures of the StyleGAN paper using pre-trained generators.""" + +import os +import pickle +import numpy as np +import PIL.Image +import dnnlib +import dnnlib.tflib as tflib +import config + +#---------------------------------------------------------------------------- +# Helpers for loading and using pre-trained generators. + +url_ffhq = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl +url_celebahq = 'https://drive.google.com/uc?id=1MGqJl28pN4t7SAtSrPdSRJSQJqahkzUf' # karras2019stylegan-celebahq-1024x1024.pkl +url_bedrooms = 'https://drive.google.com/uc?id=1MOSKeGF0FJcivpBI7s63V9YHloUTORiF' # karras2019stylegan-bedrooms-256x256.pkl +url_cars = 'https://drive.google.com/uc?id=1MJ6iCfNtMIRicihwRorsM3b7mmtmK9c3' # karras2019stylegan-cars-512x384.pkl +url_cats = 'https://drive.google.com/uc?id=1MQywl0FNt6lHu8E_EUqnRbviagS7fbiJ' # karras2019stylegan-cats-256x256.pkl + +synthesis_kwargs = dict(output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True), minibatch_size=8) + +_Gs_cache = dict() + +def load_Gs(url): + if url not in _Gs_cache: + with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f: + _G, _D, Gs = pickle.load(f) + _Gs_cache[url] = Gs + return _Gs_cache[url] + +#---------------------------------------------------------------------------- +# Figures 2, 3, 10, 11, 12: Multi-resolution grid of uncurated result images. + +def draw_uncurated_result_figure(png, Gs, cx, cy, cw, ch, rows, lods, seed): + print(png) + latents = np.random.RandomState(seed).randn(sum(rows * 2**lod for lod in lods), Gs.input_shape[1]) + images = Gs.run(latents, None, **synthesis_kwargs) # [seed, y, x, rgb] + + canvas = PIL.Image.new('RGB', (sum(cw // 2**lod for lod in lods), ch * rows), 'white') + image_iter = iter(list(images)) + for col, lod in enumerate(lods): + for row in range(rows * 2**lod): + image = PIL.Image.fromarray(next(image_iter), 'RGB') + image = image.crop((cx, cy, cx + cw, cy + ch)) + image = image.resize((cw // 2**lod, ch // 2**lod), PIL.Image.ANTIALIAS) + canvas.paste(image, (sum(cw // 2**lod for lod in lods[:col]), row * ch // 2**lod)) + canvas.save(png) + +#---------------------------------------------------------------------------- +# Figure 3: Style mixing. + +def draw_style_mixing_figure(png, Gs, w, h, src_seeds, dst_seeds, style_ranges): + print(png) + src_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in src_seeds) + dst_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in dst_seeds) + src_dlatents = Gs.components.mapping.run(src_latents, None) # [seed, layer, component] + dst_dlatents = Gs.components.mapping.run(dst_latents, None) # [seed, layer, component] + src_images = Gs.components.synthesis.run(src_dlatents, randomize_noise=False, **synthesis_kwargs) + dst_images = Gs.components.synthesis.run(dst_dlatents, randomize_noise=False, **synthesis_kwargs) + + canvas = PIL.Image.new('RGB', (w * (len(src_seeds) + 1), h * (len(dst_seeds) + 1)), 'white') + for col, src_image in enumerate(list(src_images)): + canvas.paste(PIL.Image.fromarray(src_image, 'RGB'), ((col + 1) * w, 0)) + for row, dst_image in enumerate(list(dst_images)): + canvas.paste(PIL.Image.fromarray(dst_image, 'RGB'), (0, (row + 1) * h)) + row_dlatents = np.stack([dst_dlatents[row]] * len(src_seeds)) + row_dlatents[:, style_ranges[row]] = src_dlatents[:, style_ranges[row]] + row_images = Gs.components.synthesis.run(row_dlatents, randomize_noise=False, **synthesis_kwargs) + for col, image in enumerate(list(row_images)): + canvas.paste(PIL.Image.fromarray(image, 'RGB'), ((col + 1) * w, (row + 1) * h)) + canvas.save(png) + +#---------------------------------------------------------------------------- +# Figure 4: Noise detail. + +def draw_noise_detail_figure(png, Gs, w, h, num_samples, seeds): + print(png) + canvas = PIL.Image.new('RGB', (w * 3, h * len(seeds)), 'white') + for row, seed in enumerate(seeds): + latents = np.stack([np.random.RandomState(seed).randn(Gs.input_shape[1])] * num_samples) + images = Gs.run(latents, None, truncation_psi=1, **synthesis_kwargs) + canvas.paste(PIL.Image.fromarray(images[0], 'RGB'), (0, row * h)) + for i in range(4): + crop = PIL.Image.fromarray(images[i + 1], 'RGB') + crop = crop.crop((650, 180, 906, 436)) + crop = crop.resize((w//2, h//2), PIL.Image.NEAREST) + canvas.paste(crop, (w + (i%2) * w//2, row * h + (i//2) * h//2)) + diff = np.std(np.mean(images, axis=3), axis=0) * 4 + diff = np.clip(diff + 0.5, 0, 255).astype(np.uint8) + canvas.paste(PIL.Image.fromarray(diff, 'L'), (w * 2, row * h)) + canvas.save(png) + +#---------------------------------------------------------------------------- +# Figure 5: Noise components. + +def draw_noise_components_figure(png, Gs, w, h, seeds, noise_ranges, flips): + print(png) + Gsc = Gs.clone() + noise_vars = [var for name, var in Gsc.components.synthesis.vars.items() if name.startswith('noise')] + noise_pairs = list(zip(noise_vars, tflib.run(noise_vars))) # [(var, val), ...] + latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in seeds) + all_images = [] + for noise_range in noise_ranges: + tflib.set_vars({var: val * (1 if i in noise_range else 0) for i, (var, val) in enumerate(noise_pairs)}) + range_images = Gsc.run(latents, None, truncation_psi=1, randomize_noise=False, **synthesis_kwargs) + range_images[flips, :, :] = range_images[flips, :, ::-1] + all_images.append(list(range_images)) + + canvas = PIL.Image.new('RGB', (w * 2, h * 2), 'white') + for col, col_images in enumerate(zip(*all_images)): + canvas.paste(PIL.Image.fromarray(col_images[0], 'RGB').crop((0, 0, w//2, h)), (col * w, 0)) + canvas.paste(PIL.Image.fromarray(col_images[1], 'RGB').crop((w//2, 0, w, h)), (col * w + w//2, 0)) + canvas.paste(PIL.Image.fromarray(col_images[2], 'RGB').crop((0, 0, w//2, h)), (col * w, h)) + canvas.paste(PIL.Image.fromarray(col_images[3], 'RGB').crop((w//2, 0, w, h)), (col * w + w//2, h)) + canvas.save(png) + +#---------------------------------------------------------------------------- +# Figure 8: Truncation trick. + +def draw_truncation_trick_figure(png, Gs, w, h, seeds, psis): + print(png) + latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in seeds) + dlatents = Gs.components.mapping.run(latents, None) # [seed, layer, component] + dlatent_avg = Gs.get_var('dlatent_avg') # [component] + + canvas = PIL.Image.new('RGB', (w * len(psis), h * len(seeds)), 'white') + for row, dlatent in enumerate(list(dlatents)): + row_dlatents = (dlatent[np.newaxis] - dlatent_avg) * np.reshape(psis, [-1, 1, 1]) + dlatent_avg + row_images = Gs.components.synthesis.run(row_dlatents, randomize_noise=False, **synthesis_kwargs) + for col, image in enumerate(list(row_images)): + canvas.paste(PIL.Image.fromarray(image, 'RGB'), (col * w, row * h)) + canvas.save(png) + +#---------------------------------------------------------------------------- +# Main program. + +def main(): + tflib.init_tf() + os.makedirs(config.result_dir, exist_ok=True) + draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure02-uncurated-ffhq.png'), load_Gs(url_ffhq), cx=0, cy=0, cw=1024, ch=1024, rows=3, lods=[0,1,2,2,3,3], seed=5) + draw_style_mixing_figure(os.path.join(config.result_dir, 'figure03-style-mixing.png'), load_Gs(url_ffhq), w=1024, h=1024, src_seeds=[639,701,687,615,2268], dst_seeds=[888,829,1898,1733,1614,845], style_ranges=[range(0,4)]*3+[range(4,8)]*2+[range(8,18)]) + draw_noise_detail_figure(os.path.join(config.result_dir, 'figure04-noise-detail.png'), load_Gs(url_ffhq), w=1024, h=1024, num_samples=100, seeds=[1157,1012]) + draw_noise_components_figure(os.path.join(config.result_dir, 'figure05-noise-components.png'), load_Gs(url_ffhq), w=1024, h=1024, seeds=[1967,1555], noise_ranges=[range(0, 18), range(0, 0), range(8, 18), range(0, 8)], flips=[1]) + draw_truncation_trick_figure(os.path.join(config.result_dir, 'figure08-truncation-trick.png'), load_Gs(url_ffhq), w=1024, h=1024, seeds=[91,388], psis=[1, 0.7, 0.5, 0, -0.5, -1]) + draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure10-uncurated-bedrooms.png'), load_Gs(url_bedrooms), cx=0, cy=0, cw=256, ch=256, rows=5, lods=[0,0,1,1,2,2,2], seed=0) + draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure11-uncurated-cars.png'), load_Gs(url_cars), cx=0, cy=64, cw=512, ch=384, rows=4, lods=[0,1,2,2,3,3], seed=2) + draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure12-uncurated-cats.png'), load_Gs(url_cats), cx=0, cy=0, cw=256, ch=256, rows=5, lods=[0,0,1,1,2,2,2], seed=1) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/metrics/__init__.py b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db8124b132f91216c0ded226f20ea3a046734728 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/metrics/frechet_inception_distance.py b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/frechet_inception_distance.py new file mode 100644 index 0000000000000000000000000000000000000000..41f71fe4bfb85218cc283b3f7bc3a34fea5f790d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/frechet_inception_distance.py @@ -0,0 +1,72 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Frechet Inception Distance (FID).""" + +import os +import numpy as np +import scipy +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +class FID(metric_base.MetricBase): + def __init__(self, num_images, minibatch_per_gpu, **kwargs): + super().__init__(**kwargs) + self.num_images = num_images + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + inception = misc.load_pkl('https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn') # inception_v3_features.pkl + activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) + + # Calculate statistics for reals. + cache_file = self._get_cache_file_for_reals(num_images=self.num_images) + os.makedirs(os.path.dirname(cache_file), exist_ok=True) + if os.path.isfile(cache_file): + mu_real, sigma_real = misc.load_pkl(cache_file) + else: + for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)): + begin = idx * minibatch_size + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = inception.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True) + if end == self.num_images: + break + mu_real = np.mean(activations, axis=0) + sigma_real = np.cov(activations, rowvar=False) + misc.save_pkl((mu_real, sigma_real), cache_file) + + # Construct TensorFlow graph. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + inception_clone = inception.clone() + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + images = Gs_clone.get_output_for(latents, None, is_validation=True, randomize_noise=True) + images = tflib.convert_images_to_uint8(images) + result_expr.append(inception_clone.get_output_for(images)) + + # Calculate statistics for fakes. + for begin in range(0, self.num_images, minibatch_size): + end = min(begin + minibatch_size, self.num_images) + activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] + mu_fake = np.mean(activations, axis=0) + sigma_fake = np.cov(activations, rowvar=False) + + # Calculate FID. + m = np.square(mu_fake - mu_real).sum() + s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member + dist = m + np.trace(sigma_fake + sigma_real - 2*s) + self._report_result(np.real(dist)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/metrics/linear_separability.py b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/linear_separability.py new file mode 100644 index 0000000000000000000000000000000000000000..e50be5a0fea00eba7af2d05cccf74bacedbea1c3 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/linear_separability.py @@ -0,0 +1,177 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Linear Separability (LS).""" + +from collections import defaultdict +import numpy as np +import sklearn.svm +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +classifier_urls = [ + 'https://drive.google.com/uc?id=1Q5-AI6TwWhCVM7Muu4tBM7rp5nG_gmCX', # celebahq-classifier-00-male.pkl + 'https://drive.google.com/uc?id=1Q5c6HE__ReW2W8qYAXpao68V1ryuisGo', # celebahq-classifier-01-smiling.pkl + 'https://drive.google.com/uc?id=1Q7738mgWTljPOJQrZtSMLxzShEhrvVsU', # celebahq-classifier-02-attractive.pkl + 'https://drive.google.com/uc?id=1QBv2Mxe7ZLvOv1YBTLq-T4DS3HjmXV0o', # celebahq-classifier-03-wavy-hair.pkl + 'https://drive.google.com/uc?id=1QIvKTrkYpUrdA45nf7pspwAqXDwWOLhV', # celebahq-classifier-04-young.pkl + 'https://drive.google.com/uc?id=1QJPH5rW7MbIjFUdZT7vRYfyUjNYDl4_L', # celebahq-classifier-05-5-o-clock-shadow.pkl + 'https://drive.google.com/uc?id=1QPZXSYf6cptQnApWS_T83sqFMun3rULY', # celebahq-classifier-06-arched-eyebrows.pkl + 'https://drive.google.com/uc?id=1QPgoAZRqINXk_PFoQ6NwMmiJfxc5d2Pg', # celebahq-classifier-07-bags-under-eyes.pkl + 'https://drive.google.com/uc?id=1QQPQgxgI6wrMWNyxFyTLSgMVZmRr1oO7', # celebahq-classifier-08-bald.pkl + 'https://drive.google.com/uc?id=1QcSphAmV62UrCIqhMGgcIlZfoe8hfWaF', # celebahq-classifier-09-bangs.pkl + 'https://drive.google.com/uc?id=1QdWTVwljClTFrrrcZnPuPOR4mEuz7jGh', # celebahq-classifier-10-big-lips.pkl + 'https://drive.google.com/uc?id=1QgvEWEtr2mS4yj1b_Y3WKe6cLWL3LYmK', # celebahq-classifier-11-big-nose.pkl + 'https://drive.google.com/uc?id=1QidfMk9FOKgmUUIziTCeo8t-kTGwcT18', # celebahq-classifier-12-black-hair.pkl + 'https://drive.google.com/uc?id=1QthrJt-wY31GPtV8SbnZQZ0_UEdhasHO', # celebahq-classifier-13-blond-hair.pkl + 'https://drive.google.com/uc?id=1QvCAkXxdYT4sIwCzYDnCL9Nb5TDYUxGW', # celebahq-classifier-14-blurry.pkl + 'https://drive.google.com/uc?id=1QvLWuwSuWI9Ln8cpxSGHIciUsnmaw8L0', # celebahq-classifier-15-brown-hair.pkl + 'https://drive.google.com/uc?id=1QxW6THPI2fqDoiFEMaV6pWWHhKI_OoA7', # celebahq-classifier-16-bushy-eyebrows.pkl + 'https://drive.google.com/uc?id=1R71xKw8oTW2IHyqmRDChhTBkW9wq4N9v', # celebahq-classifier-17-chubby.pkl + 'https://drive.google.com/uc?id=1RDn_fiLfEGbTc7JjazRXuAxJpr-4Pl67', # celebahq-classifier-18-double-chin.pkl + 'https://drive.google.com/uc?id=1RGBuwXbaz5052bM4VFvaSJaqNvVM4_cI', # celebahq-classifier-19-eyeglasses.pkl + 'https://drive.google.com/uc?id=1RIxOiWxDpUwhB-9HzDkbkLegkd7euRU9', # celebahq-classifier-20-goatee.pkl + 'https://drive.google.com/uc?id=1RPaNiEnJODdr-fwXhUFdoSQLFFZC7rC-', # celebahq-classifier-21-gray-hair.pkl + 'https://drive.google.com/uc?id=1RQH8lPSwOI2K_9XQCZ2Ktz7xm46o80ep', # celebahq-classifier-22-heavy-makeup.pkl + 'https://drive.google.com/uc?id=1RXZM61xCzlwUZKq-X7QhxOg0D2telPow', # celebahq-classifier-23-high-cheekbones.pkl + 'https://drive.google.com/uc?id=1RgASVHW8EWMyOCiRb5fsUijFu-HfxONM', # celebahq-classifier-24-mouth-slightly-open.pkl + 'https://drive.google.com/uc?id=1RkC8JLqLosWMaRne3DARRgolhbtg_wnr', # celebahq-classifier-25-mustache.pkl + 'https://drive.google.com/uc?id=1RqtbtFT2EuwpGTqsTYJDyXdnDsFCPtLO', # celebahq-classifier-26-narrow-eyes.pkl + 'https://drive.google.com/uc?id=1Rs7hU-re8bBMeRHR-fKgMbjPh-RIbrsh', # celebahq-classifier-27-no-beard.pkl + 'https://drive.google.com/uc?id=1RynDJQWdGOAGffmkPVCrLJqy_fciPF9E', # celebahq-classifier-28-oval-face.pkl + 'https://drive.google.com/uc?id=1S0TZ_Hdv5cb06NDaCD8NqVfKy7MuXZsN', # celebahq-classifier-29-pale-skin.pkl + 'https://drive.google.com/uc?id=1S3JPhZH2B4gVZZYCWkxoRP11q09PjCkA', # celebahq-classifier-30-pointy-nose.pkl + 'https://drive.google.com/uc?id=1S3pQuUz-Jiywq_euhsfezWfGkfzLZ87W', # celebahq-classifier-31-receding-hairline.pkl + 'https://drive.google.com/uc?id=1S6nyIl_SEI3M4l748xEdTV2vymB_-lrY', # celebahq-classifier-32-rosy-cheeks.pkl + 'https://drive.google.com/uc?id=1S9P5WCi3GYIBPVYiPTWygrYIUSIKGxbU', # celebahq-classifier-33-sideburns.pkl + 'https://drive.google.com/uc?id=1SANviG-pp08n7AFpE9wrARzozPIlbfCH', # celebahq-classifier-34-straight-hair.pkl + 'https://drive.google.com/uc?id=1SArgyMl6_z7P7coAuArqUC2zbmckecEY', # celebahq-classifier-35-wearing-earrings.pkl + 'https://drive.google.com/uc?id=1SC5JjS5J-J4zXFO9Vk2ZU2DT82TZUza_', # celebahq-classifier-36-wearing-hat.pkl + 'https://drive.google.com/uc?id=1SDAQWz03HGiu0MSOKyn7gvrp3wdIGoj-', # celebahq-classifier-37-wearing-lipstick.pkl + 'https://drive.google.com/uc?id=1SEtrVK-TQUC0XeGkBE9y7L8VXfbchyKX', # celebahq-classifier-38-wearing-necklace.pkl + 'https://drive.google.com/uc?id=1SF_mJIdyGINXoV-I6IAxHB_k5dxiF6M-', # celebahq-classifier-39-wearing-necktie.pkl +] + +#---------------------------------------------------------------------------- + +def prob_normalize(p): + p = np.asarray(p).astype(np.float32) + assert len(p.shape) == 2 + return p / np.sum(p) + +def mutual_information(p): + p = prob_normalize(p) + px = np.sum(p, axis=1) + py = np.sum(p, axis=0) + result = 0.0 + for x in range(p.shape[0]): + p_x = px[x] + for y in range(p.shape[1]): + p_xy = p[x][y] + p_y = py[y] + if p_xy > 0.0: + result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output + return result + +def entropy(p): + p = prob_normalize(p) + result = 0.0 + for x in range(p.shape[0]): + for y in range(p.shape[1]): + p_xy = p[x][y] + if p_xy > 0.0: + result -= p_xy * np.log2(p_xy) + return result + +def conditional_entropy(p): + # H(Y|X) where X corresponds to axis 0, Y to axis 1 + # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0? + p = prob_normalize(p) + y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y) + return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up. + +#---------------------------------------------------------------------------- + +class LS(metric_base.MetricBase): + def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs): + assert num_keep <= num_samples + super().__init__(**kwargs) + self.num_samples = num_samples + self.num_keep = num_keep + self.attrib_indices = attrib_indices + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph for each GPU. + result_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + + # Generate images. + latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) + dlatents = Gs_clone.components.mapping.get_output_for(latents, None, is_validation=True) + images = Gs_clone.components.synthesis.get_output_for(dlatents, is_validation=True, randomize_noise=True) + + # Downsample to 256x256. The attribute classifiers were built for 256x256. + if images.shape[2] > 256: + factor = images.shape[2] // 256 + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3, 5]) + + # Run classifier for each attribute. + result_dict = dict(latents=latents, dlatents=dlatents[:,-1]) + for attrib_idx in self.attrib_indices: + classifier = misc.load_pkl(classifier_urls[attrib_idx]) + logits = classifier.get_output_for(images, None) + predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1)) + result_dict[attrib_idx] = predictions + result_expr.append(result_dict) + + # Sampling loop. + results = [] + for _ in range(0, self.num_samples, minibatch_size): + results += tflib.run(result_expr) + results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()} + + # Calculate conditional entropy for each attribute. + conditional_entropies = defaultdict(list) + for attrib_idx in self.attrib_indices: + # Prune the least confident samples. + pruned_indices = list(range(self.num_samples)) + pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) + pruned_indices = pruned_indices[:self.num_keep] + + # Fit SVM to the remaining samples. + svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) + for space in ['latents', 'dlatents']: + svm_inputs = results[space][pruned_indices] + try: + svm = sklearn.svm.LinearSVC() + svm.fit(svm_inputs, svm_targets) + svm.score(svm_inputs, svm_targets) + svm_outputs = svm.predict(svm_inputs) + except: + svm_outputs = svm_targets # assume perfect prediction + + # Calculate conditional entropy. + p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)] + conditional_entropies[space].append(conditional_entropy(p)) + + # Calculate separability scores. + scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()} + self._report_result(scores['latents'], suffix='_z') + self._report_result(scores['dlatents'], suffix='_w') + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/metrics/metric_base.py b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/metric_base.py new file mode 100644 index 0000000000000000000000000000000000000000..0db82adecb60260393eaf82bd991575d79085787 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/metric_base.py @@ -0,0 +1,142 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Common definitions for GAN metrics.""" + +import os +import time +import hashlib +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +import config +from training import misc +from training import dataset + +#---------------------------------------------------------------------------- +# Standard metrics. + +fid50k = dnnlib.EasyDict(func_name='metrics.frechet_inception_distance.FID', name='fid50k', num_images=50000, minibatch_per_gpu=8) +ppl_zfull = dnnlib.EasyDict(func_name='metrics.perceptual_path_length.PPL', name='ppl_zfull', num_samples=100000, epsilon=1e-4, space='z', sampling='full', minibatch_per_gpu=16) +ppl_wfull = dnnlib.EasyDict(func_name='metrics.perceptual_path_length.PPL', name='ppl_wfull', num_samples=100000, epsilon=1e-4, space='w', sampling='full', minibatch_per_gpu=16) +ppl_zend = dnnlib.EasyDict(func_name='metrics.perceptual_path_length.PPL', name='ppl_zend', num_samples=100000, epsilon=1e-4, space='z', sampling='end', minibatch_per_gpu=16) +ppl_wend = dnnlib.EasyDict(func_name='metrics.perceptual_path_length.PPL', name='ppl_wend', num_samples=100000, epsilon=1e-4, space='w', sampling='end', minibatch_per_gpu=16) +ls = dnnlib.EasyDict(func_name='metrics.linear_separability.LS', name='ls', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4) +dummy = dnnlib.EasyDict(func_name='metrics.metric_base.DummyMetric', name='dummy') # for debugging + +#---------------------------------------------------------------------------- +# Base class for metrics. + +class MetricBase: + def __init__(self, name): + self.name = name + self._network_pkl = None + self._dataset_args = None + self._mirror_augment = None + self._results = [] + self._eval_time = None + + def run(self, network_pkl, run_dir=None, dataset_args=None, mirror_augment=None, num_gpus=1, tf_config=None, log_results=True): + self._network_pkl = network_pkl + self._dataset_args = dataset_args + self._mirror_augment = mirror_augment + self._results = [] + + if (dataset_args is None or mirror_augment is None) and run_dir is not None: + run_config = misc.parse_config_for_previous_run(run_dir) + self._dataset_args = dict(run_config['dataset']) + self._dataset_args['shuffle_mb'] = 0 + self._mirror_augment = run_config['train'].get('mirror_augment', False) + + time_begin = time.time() + with tf.Graph().as_default(), tflib.create_session(tf_config).as_default(): # pylint: disable=not-context-manager + _G, _D, Gs = misc.load_pkl(self._network_pkl) + self._evaluate(Gs, num_gpus=num_gpus) + self._eval_time = time.time() - time_begin + + if log_results: + result_str = self.get_result_str() + if run_dir is not None: + log = os.path.join(run_dir, 'metric-%s.txt' % self.name) + with dnnlib.util.Logger(log, 'a'): + print(result_str) + else: + print(result_str) + + def get_result_str(self): + network_name = os.path.splitext(os.path.basename(self._network_pkl))[0] + if len(network_name) > 29: + network_name = '...' + network_name[-26:] + result_str = '%-30s' % network_name + result_str += ' time %-12s' % dnnlib.util.format_time(self._eval_time) + for res in self._results: + result_str += ' ' + self.name + res.suffix + ' ' + result_str += res.fmt % res.value + return result_str + + def update_autosummaries(self): + for res in self._results: + tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value) + + def _evaluate(self, Gs, num_gpus): + raise NotImplementedError # to be overridden by subclasses + + def _report_result(self, value, suffix='', fmt='%-10.4f'): + self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)] + + def _get_cache_file_for_reals(self, extension='pkl', **kwargs): + all_args = dnnlib.EasyDict(metric_name=self.name, mirror_augment=self._mirror_augment) + all_args.update(self._dataset_args) + all_args.update(kwargs) + md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8')) + dataset_name = self._dataset_args['tfrecord_dir'].replace('\\', '/').split('/')[-1] + return os.path.join(config.cache_dir, '%s-%s-%s.%s' % (md5.hexdigest(), self.name, dataset_name, extension)) + + def _iterate_reals(self, minibatch_size): + dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **self._dataset_args) + while True: + images, _labels = dataset_obj.get_minibatch_np(minibatch_size) + if self._mirror_augment: + images = misc.apply_mirror_augment(images) + yield images + + def _iterate_fakes(self, Gs, minibatch_size, num_gpus): + while True: + latents = np.random.randn(minibatch_size, *Gs.input_shape[1:]) + fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) + images = Gs.run(latents, None, output_transform=fmt, is_validation=True, num_gpus=num_gpus, assume_frozen=True) + yield images + +#---------------------------------------------------------------------------- +# Group of multiple metrics. + +class MetricGroup: + def __init__(self, metric_kwarg_list): + self.metrics = [dnnlib.util.call_func_by_name(**kwargs) for kwargs in metric_kwarg_list] + + def run(self, *args, **kwargs): + for metric in self.metrics: + metric.run(*args, **kwargs) + + def get_result_str(self): + return ' '.join(metric.get_result_str() for metric in self.metrics) + + def update_autosummaries(self): + for metric in self.metrics: + metric.update_autosummaries() + +#---------------------------------------------------------------------------- +# Dummy metric for debugging purposes. + +class DummyMetric(MetricBase): + def _evaluate(self, Gs, num_gpus): + _ = Gs, num_gpus + self._report_result(0.0) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/metrics/perceptual_path_length.py b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/perceptual_path_length.py new file mode 100644 index 0000000000000000000000000000000000000000..17271cfdf1545a26ab71d309ce2180532f513bd6 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/metrics/perceptual_path_length.py @@ -0,0 +1,108 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Perceptual Path Length (PPL).""" + +import numpy as np +import tensorflow as tf +import dnnlib.tflib as tflib + +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +# Normalize batch of vectors. +def normalize(v): + return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True)) + +# Spherical interpolation of a batch of vectors. +def slerp(a, b, t): + a = normalize(a) + b = normalize(b) + d = tf.reduce_sum(a * b, axis=-1, keepdims=True) + p = t * tf.math.acos(d) + c = normalize(b - d * a) + d = a * tf.math.cos(p) + c * tf.math.sin(p) + return normalize(d) + +#---------------------------------------------------------------------------- + +class PPL(metric_base.MetricBase): + def __init__(self, num_samples, epsilon, space, sampling, minibatch_per_gpu, **kwargs): + assert space in ['z', 'w'] + assert sampling in ['full', 'end'] + super().__init__(**kwargs) + self.num_samples = num_samples + self.epsilon = epsilon + self.space = space + self.sampling = sampling + self.minibatch_per_gpu = minibatch_per_gpu + + def _evaluate(self, Gs, num_gpus): + minibatch_size = num_gpus * self.minibatch_per_gpu + + # Construct TensorFlow graph. + distance_expr = [] + for gpu_idx in range(num_gpus): + with tf.device('/gpu:%d' % gpu_idx): + Gs_clone = Gs.clone() + noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')] + + # Generate random latents and interpolation t-values. + lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) + lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) + + # Interpolate in W or Z. + if self.space == 'w': + dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, None, is_validation=True) + dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] + dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) + dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) + dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) + else: # space == 'z' + lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] + lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) + lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) + lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) + dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, None, is_validation=True) + + # Synthesize images. + with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch + images = Gs_clone.components.synthesis.get_output_for(dlat_e01, is_validation=True, randomize_noise=False) + + # Crop only the face region. + c = int(images.shape[2] // 8) + images = images[:, :, c*3 : c*7, c*2 : c*6] + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + if images.shape[2] > 256: + factor = images.shape[2] // 256 + images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) + images = tf.reduce_mean(images, axis=[3,5]) + + # Scale dynamic range from [-1,1] to [0,255] for VGG. + images = (images + 1) * (255 / 2) + + # Evaluate perceptual distance. + img_e0, img_e1 = images[0::2], images[1::2] + distance_measure = misc.load_pkl('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2') # vgg16_zhang_perceptual.pkl + distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) + + # Sampling loop. + all_distances = [] + for _ in range(0, self.num_samples, minibatch_size): + all_distances += tflib.run(distance_expr) + all_distances = np.concatenate(all_distances, axis=0) + + # Reject outliers. + lo = np.percentile(all_distances, 1, interpolation='lower') + hi = np.percentile(all_distances, 99, interpolation='higher') + filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) + self._report_result(np.mean(filtered_distances)) + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/pretrained_example.py b/ContraCLIP/models/genforce/converters/stylegan_official/pretrained_example.py new file mode 100644 index 0000000000000000000000000000000000000000..63baef08bfa4bf34f52a0cf63e10a0b6783ac316 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/pretrained_example.py @@ -0,0 +1,47 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Minimal script for generating an image using pre-trained StyleGAN generator.""" + +import os +import pickle +import numpy as np +import PIL.Image +import dnnlib +import dnnlib.tflib as tflib +import config + +def main(): + # Initialize TensorFlow. + tflib.init_tf() + + # Load pre-trained network. + url = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl + with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f: + _G, _D, Gs = pickle.load(f) + # _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run. + # _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run. + # Gs = Long-term average of the generator. Yields higher-quality results than the instantaneous snapshot. + + # Print network details. + Gs.print_layers() + + # Pick latent vector. + rnd = np.random.RandomState(5) + latents = rnd.randn(1, Gs.input_shape[1]) + + # Generate image. + fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) + images = Gs.run(latents, None, truncation_psi=0.7, randomize_noise=True, output_transform=fmt) + + # Save image. + os.makedirs(config.result_dir, exist_ok=True) + png_filename = os.path.join(config.result_dir, 'example.png') + PIL.Image.fromarray(images[0], 'RGB').save(png_filename) + +if __name__ == "__main__": + main() diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/run_metrics.py b/ContraCLIP/models/genforce/converters/stylegan_official/run_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..5d1597bbd4e16a2535309ea74c3559cae2a5fa53 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/run_metrics.py @@ -0,0 +1,105 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Main entry point for training StyleGAN and ProGAN networks.""" + +import dnnlib +from dnnlib import EasyDict +import dnnlib.tflib as tflib + +import config +from metrics import metric_base +from training import misc + +#---------------------------------------------------------------------------- + +def run_pickle(submit_config, metric_args, network_pkl, dataset_args, mirror_augment): + ctx = dnnlib.RunContext(submit_config) + tflib.init_tf() + print('Evaluating %s metric on network_pkl "%s"...' % (metric_args.name, network_pkl)) + metric = dnnlib.util.call_func_by_name(**metric_args) + print() + metric.run(network_pkl, dataset_args=dataset_args, mirror_augment=mirror_augment, num_gpus=submit_config.num_gpus) + print() + ctx.close() + +#---------------------------------------------------------------------------- + +def run_snapshot(submit_config, metric_args, run_id, snapshot): + ctx = dnnlib.RunContext(submit_config) + tflib.init_tf() + print('Evaluating %s metric on run_id %s, snapshot %s...' % (metric_args.name, run_id, snapshot)) + run_dir = misc.locate_run_dir(run_id) + network_pkl = misc.locate_network_pkl(run_dir, snapshot) + metric = dnnlib.util.call_func_by_name(**metric_args) + print() + metric.run(network_pkl, run_dir=run_dir, num_gpus=submit_config.num_gpus) + print() + ctx.close() + +#---------------------------------------------------------------------------- + +def run_all_snapshots(submit_config, metric_args, run_id): + ctx = dnnlib.RunContext(submit_config) + tflib.init_tf() + print('Evaluating %s metric on all snapshots of run_id %s...' % (metric_args.name, run_id)) + run_dir = misc.locate_run_dir(run_id) + network_pkls = misc.list_network_pkls(run_dir) + metric = dnnlib.util.call_func_by_name(**metric_args) + print() + for idx, network_pkl in enumerate(network_pkls): + ctx.update('', idx, len(network_pkls)) + metric.run(network_pkl, run_dir=run_dir, num_gpus=submit_config.num_gpus) + print() + ctx.close() + +#---------------------------------------------------------------------------- + +def main(): + submit_config = dnnlib.SubmitConfig() + + # Which metrics to evaluate? + metrics = [] + metrics += [metric_base.fid50k] + #metrics += [metric_base.ppl_zfull] + #metrics += [metric_base.ppl_wfull] + #metrics += [metric_base.ppl_zend] + #metrics += [metric_base.ppl_wend] + #metrics += [metric_base.ls] + #metrics += [metric_base.dummy] + + # Which networks to evaluate them on? + tasks = [] + tasks += [EasyDict(run_func_name='run_metrics.run_pickle', network_pkl='https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ', dataset_args=EasyDict(tfrecord_dir='ffhq', shuffle_mb=0), mirror_augment=True)] # karras2019stylegan-ffhq-1024x1024.pkl + #tasks += [EasyDict(run_func_name='run_metrics.run_snapshot', run_id=100, snapshot=25000)] + #tasks += [EasyDict(run_func_name='run_metrics.run_all_snapshots', run_id=100)] + + # How many GPUs to use? + submit_config.num_gpus = 1 + #submit_config.num_gpus = 2 + #submit_config.num_gpus = 4 + #submit_config.num_gpus = 8 + + # Execute. + submit_config.run_dir_root = dnnlib.submission.submit.get_template_from_path(config.result_dir) + submit_config.run_dir_ignore += config.run_dir_ignore + for task in tasks: + for metric in metrics: + submit_config.run_desc = '%s-%s' % (task.run_func_name, metric.name) + if task.run_func_name.endswith('run_snapshot'): + submit_config.run_desc += '-%s-%s' % (task.run_id, task.snapshot) + if task.run_func_name.endswith('run_all_snapshots'): + submit_config.run_desc += '-%s' % task.run_id + submit_config.run_desc += '-%dgpu' % submit_config.num_gpus + dnnlib.submit_run(submit_config, metric_args=metric, **task) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/train.py b/ContraCLIP/models/genforce/converters/stylegan_official/train.py new file mode 100644 index 0000000000000000000000000000000000000000..29df3c226b87816ceec25752293df08a70d63189 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/train.py @@ -0,0 +1,192 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Main entry point for training StyleGAN and ProGAN networks.""" + +import copy +import dnnlib +from dnnlib import EasyDict + +import config +from metrics import metric_base + +#---------------------------------------------------------------------------- +# Official training configs for StyleGAN, targeted mainly for FFHQ. + +if 1: + desc = 'sgan' # Description string included in result subdir name. + train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop. + G = EasyDict(func_name='training.networks_stylegan.G_style') # Options for generator network. + D = EasyDict(func_name='training.networks_stylegan.D_basic') # Options for discriminator network. + G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. + D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. + G_loss = EasyDict(func_name='training.loss.G_logistic_nonsaturating') # Options for generator loss. + D_loss = EasyDict(func_name='training.loss.D_logistic_simplegp', r1_gamma=10.0) # Options for discriminator loss. + dataset = EasyDict() # Options for load_dataset(). + sched = EasyDict() # Options for TrainingSchedule. + grid = EasyDict(size='4k', layout='random') # Options for setup_snapshot_image_grid(). + metrics = [metric_base.fid50k] # Options for MetricGroup. + submit_config = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). + tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). + + # Dataset. + desc += '-ffhq'; dataset = EasyDict(tfrecord_dir='ffhq'); train.mirror_augment = True + #desc += '-ffhq512'; dataset = EasyDict(tfrecord_dir='ffhq', resolution=512); train.mirror_augment = True + #desc += '-ffhq256'; dataset = EasyDict(tfrecord_dir='ffhq', resolution=256); train.mirror_augment = True + #desc += '-celebahq'; dataset = EasyDict(tfrecord_dir='celebahq'); train.mirror_augment = True + #desc += '-bedroom'; dataset = EasyDict(tfrecord_dir='lsun-bedroom-full'); train.mirror_augment = False + #desc += '-car'; dataset = EasyDict(tfrecord_dir='lsun-car-512x384'); train.mirror_augment = False + #desc += '-cat'; dataset = EasyDict(tfrecord_dir='lsun-cat-full'); train.mirror_augment = False + + # Number of GPUs. + #desc += '-1gpu'; submit_config.num_gpus = 1; sched.minibatch_base = 4; sched.minibatch_dict = {4: 128, 8: 128, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8, 512: 4} + #desc += '-2gpu'; submit_config.num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8} + #desc += '-4gpu'; submit_config.num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16} + desc += '-8gpu'; submit_config.num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32} + + # Default options. + train.total_kimg = 25000 + sched.lod_initial_resolution = 8 + sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003} + sched.D_lrate_dict = EasyDict(sched.G_lrate_dict) + + # WGAN-GP loss for CelebA-HQ. + #desc += '-wgangp'; G_loss = EasyDict(func_name='training.loss.G_wgan'); D_loss = EasyDict(func_name='training.loss.D_wgan_gp'); sched.G_lrate_dict = {k: min(v, 0.002) for k, v in sched.G_lrate_dict.items()}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict) + + # Table 1. + #desc += '-tuned-baseline'; G.use_styles = False; G.use_pixel_norm = True; G.use_instance_norm = False; G.mapping_layers = 0; G.truncation_psi = None; G.const_input_layer = False; G.style_mixing_prob = 0.0; G.use_noise = False + #desc += '-add-mapping-and-styles'; G.const_input_layer = False; G.style_mixing_prob = 0.0; G.use_noise = False + #desc += '-remove-traditional-input'; G.style_mixing_prob = 0.0; G.use_noise = False + #desc += '-add-noise-inputs'; G.style_mixing_prob = 0.0 + #desc += '-mixing-regularization' # default + + # Table 2. + #desc += '-mix0'; G.style_mixing_prob = 0.0 + #desc += '-mix50'; G.style_mixing_prob = 0.5 + #desc += '-mix90'; G.style_mixing_prob = 0.9 # default + #desc += '-mix100'; G.style_mixing_prob = 1.0 + + # Table 4. + #desc += '-traditional-0'; G.use_styles = False; G.use_pixel_norm = True; G.use_instance_norm = False; G.mapping_layers = 0; G.truncation_psi = None; G.const_input_layer = False; G.style_mixing_prob = 0.0; G.use_noise = False + #desc += '-traditional-8'; G.use_styles = False; G.use_pixel_norm = True; G.use_instance_norm = False; G.mapping_layers = 8; G.truncation_psi = None; G.const_input_layer = False; G.style_mixing_prob = 0.0; G.use_noise = False + #desc += '-stylebased-0'; G.mapping_layers = 0 + #desc += '-stylebased-1'; G.mapping_layers = 1 + #desc += '-stylebased-2'; G.mapping_layers = 2 + #desc += '-stylebased-8'; G.mapping_layers = 8 # default + +#---------------------------------------------------------------------------- +# Official training configs for Progressive GAN, targeted mainly for CelebA-HQ. + +if 0: + desc = 'pgan' # Description string included in result subdir name. + train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop. + G = EasyDict(func_name='training.networks_progan.G_paper') # Options for generator network. + D = EasyDict(func_name='training.networks_progan.D_paper') # Options for discriminator network. + G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. + D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. + G_loss = EasyDict(func_name='training.loss.G_wgan') # Options for generator loss. + D_loss = EasyDict(func_name='training.loss.D_wgan_gp') # Options for discriminator loss. + dataset = EasyDict() # Options for load_dataset(). + sched = EasyDict() # Options for TrainingSchedule. + grid = EasyDict(size='1080p', layout='random') # Options for setup_snapshot_image_grid(). + metrics = [metric_base.fid50k] # Options for MetricGroup. + submit_config = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). + tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). + + # Dataset (choose one). + desc += '-celebahq'; dataset = EasyDict(tfrecord_dir='celebahq'); train.mirror_augment = True + #desc += '-celeba'; dataset = EasyDict(tfrecord_dir='celeba'); train.mirror_augment = True + #desc += '-cifar10'; dataset = EasyDict(tfrecord_dir='cifar10') + #desc += '-cifar100'; dataset = EasyDict(tfrecord_dir='cifar100') + #desc += '-svhn'; dataset = EasyDict(tfrecord_dir='svhn') + #desc += '-mnist'; dataset = EasyDict(tfrecord_dir='mnist') + #desc += '-mnistrgb'; dataset = EasyDict(tfrecord_dir='mnistrgb') + #desc += '-syn1024rgb'; dataset = EasyDict(class_name='training.dataset.SyntheticDataset', resolution=1024, num_channels=3) + #desc += '-lsun-airplane'; dataset = EasyDict(tfrecord_dir='lsun-airplane-100k'); train.mirror_augment = True + #desc += '-lsun-bedroom'; dataset = EasyDict(tfrecord_dir='lsun-bedroom-100k'); train.mirror_augment = True + #desc += '-lsun-bicycle'; dataset = EasyDict(tfrecord_dir='lsun-bicycle-100k'); train.mirror_augment = True + #desc += '-lsun-bird'; dataset = EasyDict(tfrecord_dir='lsun-bird-100k'); train.mirror_augment = True + #desc += '-lsun-boat'; dataset = EasyDict(tfrecord_dir='lsun-boat-100k'); train.mirror_augment = True + #desc += '-lsun-bottle'; dataset = EasyDict(tfrecord_dir='lsun-bottle-100k'); train.mirror_augment = True + #desc += '-lsun-bridge'; dataset = EasyDict(tfrecord_dir='lsun-bridge-100k'); train.mirror_augment = True + #desc += '-lsun-bus'; dataset = EasyDict(tfrecord_dir='lsun-bus-100k'); train.mirror_augment = True + #desc += '-lsun-car'; dataset = EasyDict(tfrecord_dir='lsun-car-100k'); train.mirror_augment = True + #desc += '-lsun-cat'; dataset = EasyDict(tfrecord_dir='lsun-cat-100k'); train.mirror_augment = True + #desc += '-lsun-chair'; dataset = EasyDict(tfrecord_dir='lsun-chair-100k'); train.mirror_augment = True + #desc += '-lsun-churchoutdoor'; dataset = EasyDict(tfrecord_dir='lsun-churchoutdoor-100k'); train.mirror_augment = True + #desc += '-lsun-classroom'; dataset = EasyDict(tfrecord_dir='lsun-classroom-100k'); train.mirror_augment = True + #desc += '-lsun-conferenceroom'; dataset = EasyDict(tfrecord_dir='lsun-conferenceroom-100k'); train.mirror_augment = True + #desc += '-lsun-cow'; dataset = EasyDict(tfrecord_dir='lsun-cow-100k'); train.mirror_augment = True + #desc += '-lsun-diningroom'; dataset = EasyDict(tfrecord_dir='lsun-diningroom-100k'); train.mirror_augment = True + #desc += '-lsun-diningtable'; dataset = EasyDict(tfrecord_dir='lsun-diningtable-100k'); train.mirror_augment = True + #desc += '-lsun-dog'; dataset = EasyDict(tfrecord_dir='lsun-dog-100k'); train.mirror_augment = True + #desc += '-lsun-horse'; dataset = EasyDict(tfrecord_dir='lsun-horse-100k'); train.mirror_augment = True + #desc += '-lsun-kitchen'; dataset = EasyDict(tfrecord_dir='lsun-kitchen-100k'); train.mirror_augment = True + #desc += '-lsun-livingroom'; dataset = EasyDict(tfrecord_dir='lsun-livingroom-100k'); train.mirror_augment = True + #desc += '-lsun-motorbike'; dataset = EasyDict(tfrecord_dir='lsun-motorbike-100k'); train.mirror_augment = True + #desc += '-lsun-person'; dataset = EasyDict(tfrecord_dir='lsun-person-100k'); train.mirror_augment = True + #desc += '-lsun-pottedplant'; dataset = EasyDict(tfrecord_dir='lsun-pottedplant-100k'); train.mirror_augment = True + #desc += '-lsun-restaurant'; dataset = EasyDict(tfrecord_dir='lsun-restaurant-100k'); train.mirror_augment = True + #desc += '-lsun-sheep'; dataset = EasyDict(tfrecord_dir='lsun-sheep-100k'); train.mirror_augment = True + #desc += '-lsun-sofa'; dataset = EasyDict(tfrecord_dir='lsun-sofa-100k'); train.mirror_augment = True + #desc += '-lsun-tower'; dataset = EasyDict(tfrecord_dir='lsun-tower-100k'); train.mirror_augment = True + #desc += '-lsun-train'; dataset = EasyDict(tfrecord_dir='lsun-train-100k'); train.mirror_augment = True + #desc += '-lsun-tvmonitor'; dataset = EasyDict(tfrecord_dir='lsun-tvmonitor-100k'); train.mirror_augment = True + + # Conditioning & snapshot options. + #desc += '-cond'; dataset.max_label_size = 'full' # conditioned on full label + #desc += '-cond1'; dataset.max_label_size = 1 # conditioned on first component of the label + #desc += '-g4k'; grid.size = '4k' + #desc += '-grpc'; grid.layout = 'row_per_class' + + # Config presets (choose one). + #desc += '-preset-v1-1gpu'; submit_config.num_gpus = 1; D.mbstd_group_size = 16; sched.minibatch_base = 16; sched.minibatch_dict = {256: 14, 512: 6, 1024: 3}; sched.lod_training_kimg = 800; sched.lod_transition_kimg = 800; train.total_kimg = 19000 + desc += '-preset-v2-1gpu'; submit_config.num_gpus = 1; sched.minibatch_base = 4; sched.minibatch_dict = {4: 128, 8: 128, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8, 512: 4}; sched.G_lrate_dict = {1024: 0.0015}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 + #desc += '-preset-v2-2gpus'; submit_config.num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8}; sched.G_lrate_dict = {512: 0.0015, 1024: 0.002}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 + #desc += '-preset-v2-4gpus'; submit_config.num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16}; sched.G_lrate_dict = {256: 0.0015, 512: 0.002, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 + #desc += '-preset-v2-8gpus'; submit_config.num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32}; sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 + + # Numerical precision (choose one). + desc += '-fp32'; sched.max_minibatch_per_gpu = {256: 16, 512: 8, 1024: 4} + #desc += '-fp16'; G.dtype = 'float16'; D.dtype = 'float16'; G.pixelnorm_epsilon=1e-4; G_opt.use_loss_scaling = True; D_opt.use_loss_scaling = True; sched.max_minibatch_per_gpu = {512: 16, 1024: 8} + + # Disable individual features. + #desc += '-nogrowing'; sched.lod_initial_resolution = 1024; sched.lod_training_kimg = 0; sched.lod_transition_kimg = 0; train.total_kimg = 10000 + #desc += '-nopixelnorm'; G.use_pixelnorm = False + #desc += '-nowscale'; G.use_wscale = False; D.use_wscale = False + #desc += '-noleakyrelu'; G.use_leakyrelu = False + #desc += '-nosmoothing'; train.G_smoothing_kimg = 0.0 + #desc += '-norepeat'; train.minibatch_repeats = 1 + #desc += '-noreset'; train.reset_opt_for_new_lod = False + + # Special modes. + #desc += '-BENCHMARK'; sched.lod_initial_resolution = 4; sched.lod_training_kimg = 3; sched.lod_transition_kimg = 3; train.total_kimg = (8*2+1)*3; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1000; train.network_snapshot_ticks = 1000 + #desc += '-BENCHMARK0'; sched.lod_initial_resolution = 1024; train.total_kimg = 10; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1000; train.network_snapshot_ticks = 1000 + #desc += '-VERBOSE'; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1; train.network_snapshot_ticks = 100 + #desc += '-GRAPH'; train.save_tf_graph = True + #desc += '-HIST'; train.save_weight_histograms = True + +#---------------------------------------------------------------------------- +# Main entry point for training. +# Calls the function indicated by 'train' using the selected options. + +def main(): + kwargs = EasyDict(train) + kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss) + kwargs.update(dataset_args=dataset, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) + kwargs.submit_config = copy.deepcopy(submit_config) + kwargs.submit_config.run_dir_root = dnnlib.submission.submit.get_template_from_path(config.result_dir) + kwargs.submit_config.run_dir_ignore += config.run_dir_ignore + kwargs.submit_config.run_desc = desc + dnnlib.submit_run(**kwargs) + +#---------------------------------------------------------------------------- + +if __name__ == "__main__": + main() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/__init__.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db8124b132f91216c0ded226f20ea3a046734728 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +# empty diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/dataset.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..cf142226b1794b675d61151467444cb65bdaa1a0 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/dataset.py @@ -0,0 +1,241 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Multi-resolution input data pipeline.""" + +import os +import glob +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +#---------------------------------------------------------------------------- +# Parse individual image from a tfrecords file. + +def parse_tfrecord_tf(record): + features = tf.parse_single_example(record, features={ + 'shape': tf.FixedLenFeature([3], tf.int64), + 'data': tf.FixedLenFeature([], tf.string)}) + data = tf.decode_raw(features['data'], tf.uint8) + return tf.reshape(data, features['shape']) + +def parse_tfrecord_np(record): + ex = tf.train.Example() + ex.ParseFromString(record) + shape = ex.features.feature['shape'].int64_list.value # temporary pylint workaround # pylint: disable=no-member + data = ex.features.feature['data'].bytes_list.value[0] # temporary pylint workaround # pylint: disable=no-member + return np.fromstring(data, np.uint8).reshape(shape) + +#---------------------------------------------------------------------------- +# Dataset class that loads data from tfrecords files. + +class TFRecordDataset: + def __init__(self, + tfrecord_dir, # Directory containing a collection of tfrecords files. + resolution = None, # Dataset resolution, None = autodetect. + label_file = None, # Relative path of the labels file, None = autodetect. + max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components. + repeat = True, # Repeat dataset indefinitely. + shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling. + prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching. + buffer_mb = 256, # Read buffer size (megabytes). + num_threads = 2): # Number of concurrent threads. + + self.tfrecord_dir = tfrecord_dir + self.resolution = None + self.resolution_log2 = None + self.shape = [] # [channel, height, width] + self.dtype = 'uint8' + self.dynamic_range = [0, 255] + self.label_file = label_file + self.label_size = None # [component] + self.label_dtype = None + self._np_labels = None + self._tf_minibatch_in = None + self._tf_labels_var = None + self._tf_labels_dataset = None + self._tf_datasets = dict() + self._tf_iterator = None + self._tf_init_ops = dict() + self._tf_minibatch_np = None + self._cur_minibatch = -1 + self._cur_lod = -1 + + # List tfrecords files and inspect their shapes. + assert os.path.isdir(self.tfrecord_dir) + tfr_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords'))) + assert len(tfr_files) >= 1 + tfr_shapes = [] + for tfr_file in tfr_files: + tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) + for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): + tfr_shapes.append(parse_tfrecord_np(record).shape) + break + + # Autodetect label filename. + if self.label_file is None: + guess = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.labels'))) + if len(guess): + self.label_file = guess[0] + elif not os.path.isfile(self.label_file): + guess = os.path.join(self.tfrecord_dir, self.label_file) + if os.path.isfile(guess): + self.label_file = guess + + # Determine shape and resolution. + max_shape = max(tfr_shapes, key=np.prod) + self.resolution = resolution if resolution is not None else max_shape[1] + self.resolution_log2 = int(np.log2(self.resolution)) + self.shape = [max_shape[0], self.resolution, self.resolution] + tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes] + assert all(shape[0] == max_shape[0] for shape in tfr_shapes) + assert all(shape[1] == shape[2] for shape in tfr_shapes) + assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods)) + assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) + + # Load labels. + assert max_label_size == 'full' or max_label_size >= 0 + self._np_labels = np.zeros([1<<20, 0], dtype=np.float32) + if self.label_file is not None and max_label_size != 0: + self._np_labels = np.load(self.label_file) + assert self._np_labels.ndim == 2 + if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: + self._np_labels = self._np_labels[:, :max_label_size] + self.label_size = self._np_labels.shape[1] + self.label_dtype = self._np_labels.dtype.name + + # Build TF expressions. + with tf.name_scope('Dataset'), tf.device('/cpu:0'): + self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[]) + self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var') + self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var) + for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): + if tfr_lod < 0: + continue + dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20) + dset = dset.map(parse_tfrecord_tf, num_parallel_calls=num_threads) + dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) + bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize + if shuffle_mb > 0: + dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1) + if repeat: + dset = dset.repeat() + if prefetch_mb > 0: + dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1) + dset = dset.batch(self._tf_minibatch_in) + self._tf_datasets[tfr_lod] = dset + self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) + self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()} + + # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and lod in self._tf_datasets + if self._cur_minibatch != minibatch_size or self._cur_lod != lod: + self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) + self._cur_minibatch = minibatch_size + self._cur_lod = lod + + # Get next minibatch as TensorFlow expressions. + def get_minibatch_tf(self): # => images, labels + return self._tf_iterator.get_next() + + # Get next minibatch as NumPy arrays. + def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels + self.configure(minibatch_size, lod) + if self._tf_minibatch_np is None: + self._tf_minibatch_np = self.get_minibatch_tf() + return tflib.run(self._tf_minibatch_np) + + # Get random labels as TensorFlow expression. + def get_random_labels_tf(self, minibatch_size): # => labels + if self.label_size > 0: + with tf.device('/cpu:0'): + return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) + return tf.zeros([minibatch_size, 0], self.label_dtype) + + # Get random labels as NumPy array. + def get_random_labels_np(self, minibatch_size): # => labels + if self.label_size > 0: + return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] + return np.zeros([minibatch_size, 0], self.label_dtype) + +#---------------------------------------------------------------------------- +# Base class for datasets that are generated on the fly. + +class SyntheticDataset: + def __init__(self, resolution=1024, num_channels=3, dtype='uint8', dynamic_range=[0,255], label_size=0, label_dtype='float32'): + self.resolution = resolution + self.resolution_log2 = int(np.log2(resolution)) + self.shape = [num_channels, resolution, resolution] + self.dtype = dtype + self.dynamic_range = dynamic_range + self.label_size = label_size + self.label_dtype = label_dtype + self._tf_minibatch_var = None + self._tf_lod_var = None + self._tf_minibatch_np = None + self._tf_labels_np = None + + assert self.resolution == 2 ** self.resolution_log2 + with tf.name_scope('Dataset'): + self._tf_minibatch_var = tf.Variable(np.int32(0), name='minibatch_var') + self._tf_lod_var = tf.Variable(np.int32(0), name='lod_var') + + def configure(self, minibatch_size, lod=0): + lod = int(np.floor(lod)) + assert minibatch_size >= 1 and 0 <= lod <= self.resolution_log2 + tflib.set_vars({self._tf_minibatch_var: minibatch_size, self._tf_lod_var: lod}) + + def get_minibatch_tf(self): # => images, labels + with tf.name_scope('SyntheticDataset'): + shrink = tf.cast(2.0 ** tf.cast(self._tf_lod_var, tf.float32), tf.int32) + shape = [self.shape[0], self.shape[1] // shrink, self.shape[2] // shrink] + images = self._generate_images(self._tf_minibatch_var, self._tf_lod_var, shape) + labels = self._generate_labels(self._tf_minibatch_var) + return images, labels + + def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels + self.configure(minibatch_size, lod) + if self._tf_minibatch_np is None: + self._tf_minibatch_np = self.get_minibatch_tf() + return tflib.run(self._tf_minibatch_np) + + def get_random_labels_tf(self, minibatch_size): # => labels + with tf.name_scope('SyntheticDataset'): + return self._generate_labels(minibatch_size) + + def get_random_labels_np(self, minibatch_size): # => labels + self.configure(minibatch_size) + if self._tf_labels_np is None: + self._tf_labels_np = self.get_random_labels_tf(minibatch_size) + return tflib.run(self._tf_labels_np) + + def _generate_images(self, minibatch, lod, shape): # to be overridden by subclasses # pylint: disable=unused-argument + return tf.zeros([minibatch] + shape, self.dtype) + + def _generate_labels(self, minibatch): # to be overridden by subclasses + return tf.zeros([minibatch, self.label_size], self.label_dtype) + +#---------------------------------------------------------------------------- +# Helper func for constructing a dataset object using the given options. + +def load_dataset(class_name='training.dataset.TFRecordDataset', data_dir=None, verbose=False, **kwargs): + adjusted_kwargs = dict(kwargs) + if 'tfrecord_dir' in adjusted_kwargs and data_dir is not None: + adjusted_kwargs['tfrecord_dir'] = os.path.join(data_dir, adjusted_kwargs['tfrecord_dir']) + if verbose: + print('Streaming data using %s...' % class_name) + dataset = dnnlib.util.get_obj_by_name(class_name)(**adjusted_kwargs) + if verbose: + print('Dataset shape =', np.int32(dataset.shape).tolist()) + print('Dynamic range =', dataset.dynamic_range) + print('Label size =', dataset.label_size) + return dataset + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/loss.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..aa59b61bf316f73f269849b54ec3bb35b6a0d61d --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/loss.py @@ -0,0 +1,177 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Loss functions.""" + +import tensorflow as tf +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +#---------------------------------------------------------------------------- +# Convenience func that casts all of its arguments to tf.float32. + +def fp32(*values): + if len(values) == 1 and isinstance(values[0], tuple): + values = values[0] + values = tuple(tf.cast(v, tf.float32) for v in values) + return values if len(values) >= 2 else values[0] + +#---------------------------------------------------------------------------- +# WGAN & WGAN-GP loss functions. + +def G_wgan(G, D, opt, training_set, minibatch_size): # pylint: disable=unused-argument + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + loss = -fake_scores_out + return loss + +def D_wgan(G, D, opt, training_set, minibatch_size, reals, labels, # pylint: disable=unused-argument + wgan_epsilon = 0.001): # Weight for the epsilon term, \epsilon_{drift}. + + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = fake_scores_out - real_scores_out + + with tf.name_scope('EpsilonPenalty'): + epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) + loss += epsilon_penalty * wgan_epsilon + return loss + +def D_wgan_gp(G, D, opt, training_set, minibatch_size, reals, labels, # pylint: disable=unused-argument + wgan_lambda = 10.0, # Weight for the gradient penalty term. + wgan_epsilon = 0.001, # Weight for the epsilon term, \epsilon_{drift}. + wgan_target = 1.0): # Target value for gradient magnitudes. + + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = fake_scores_out - real_scores_out + + with tf.name_scope('GradientPenalty'): + mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) + mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) + mixed_scores_out = fp32(D.get_output_for(mixed_images_out, labels, is_training=True)) + mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out) + mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) + mixed_grads = opt.undo_loss_scaling(fp32(tf.gradients(mixed_loss, [mixed_images_out])[0])) + mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3])) + mixed_norms = autosummary('Loss/mixed_norms', mixed_norms) + gradient_penalty = tf.square(mixed_norms - wgan_target) + loss += gradient_penalty * (wgan_lambda / (wgan_target**2)) + + with tf.name_scope('EpsilonPenalty'): + epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) + loss += epsilon_penalty * wgan_epsilon + return loss + +#---------------------------------------------------------------------------- +# Hinge loss functions. (Use G_wgan with these) + +def D_hinge(G, D, opt, training_set, minibatch_size, reals, labels): # pylint: disable=unused-argument + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.maximum(0., 1.+fake_scores_out) + tf.maximum(0., 1.-real_scores_out) + return loss + +def D_hinge_gp(G, D, opt, training_set, minibatch_size, reals, labels, # pylint: disable=unused-argument + wgan_lambda = 10.0, # Weight for the gradient penalty term. + wgan_target = 1.0): # Target value for gradient magnitudes. + + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.maximum(0., 1.+fake_scores_out) + tf.maximum(0., 1.-real_scores_out) + + with tf.name_scope('GradientPenalty'): + mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) + mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) + mixed_scores_out = fp32(D.get_output_for(mixed_images_out, labels, is_training=True)) + mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out) + mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) + mixed_grads = opt.undo_loss_scaling(fp32(tf.gradients(mixed_loss, [mixed_images_out])[0])) + mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3])) + mixed_norms = autosummary('Loss/mixed_norms', mixed_norms) + gradient_penalty = tf.square(mixed_norms - wgan_target) + loss += gradient_penalty * (wgan_lambda / (wgan_target**2)) + return loss + + +#---------------------------------------------------------------------------- +# Loss functions advocated by the paper +# "Which Training Methods for GANs do actually Converge?" + +def G_logistic_saturating(G, D, opt, training_set, minibatch_size): # pylint: disable=unused-argument + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + loss = -tf.nn.softplus(fake_scores_out) # log(1 - logistic(fake_scores_out)) + return loss + +def G_logistic_nonsaturating(G, D, opt, training_set, minibatch_size): # pylint: disable=unused-argument + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + labels = training_set.get_random_labels_tf(minibatch_size) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + loss = tf.nn.softplus(-fake_scores_out) # -log(logistic(fake_scores_out)) + return loss + +def D_logistic(G, D, opt, training_set, minibatch_size, reals, labels): # pylint: disable=unused-argument + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.nn.softplus(fake_scores_out) # -log(1 - logistic(fake_scores_out)) + loss += tf.nn.softplus(-real_scores_out) # -log(logistic(real_scores_out)) # temporary pylint workaround # pylint: disable=invalid-unary-operand-type + return loss + +def D_logistic_simplegp(G, D, opt, training_set, minibatch_size, reals, labels, r1_gamma=10.0, r2_gamma=0.0): # pylint: disable=unused-argument + latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) + fake_images_out = G.get_output_for(latents, labels, is_training=True) + real_scores_out = fp32(D.get_output_for(reals, labels, is_training=True)) + fake_scores_out = fp32(D.get_output_for(fake_images_out, labels, is_training=True)) + real_scores_out = autosummary('Loss/scores/real', real_scores_out) + fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) + loss = tf.nn.softplus(fake_scores_out) # -log(1 - logistic(fake_scores_out)) + loss += tf.nn.softplus(-real_scores_out) # -log(logistic(real_scores_out)) # temporary pylint workaround # pylint: disable=invalid-unary-operand-type + + if r1_gamma != 0.0: + with tf.name_scope('R1Penalty'): + real_loss = opt.apply_loss_scaling(tf.reduce_sum(real_scores_out)) + real_grads = opt.undo_loss_scaling(fp32(tf.gradients(real_loss, [reals])[0])) + r1_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1,2,3]) + r1_penalty = autosummary('Loss/r1_penalty', r1_penalty) + loss += r1_penalty * (r1_gamma * 0.5) + + if r2_gamma != 0.0: + with tf.name_scope('R2Penalty'): + fake_loss = opt.apply_loss_scaling(tf.reduce_sum(fake_scores_out)) + fake_grads = opt.undo_loss_scaling(fp32(tf.gradients(fake_loss, [fake_images_out])[0])) + r2_penalty = tf.reduce_sum(tf.square(fake_grads), axis=[1,2,3]) + r2_penalty = autosummary('Loss/r2_penalty', r2_penalty) + loss += r2_penalty * (r2_gamma * 0.5) + return loss + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/misc.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..50ae51c722cb1e553c56051cbd4556110fe4a1f9 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/misc.py @@ -0,0 +1,245 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Miscellaneous utility functions.""" + +import os +import glob +import pickle +import re +import numpy as np +from collections import defaultdict +import PIL.Image +import dnnlib + +import config +from training import dataset + +#---------------------------------------------------------------------------- +# Convenience wrappers for pickle that are able to load data produced by +# older versions of the code, and from external URLs. + +def open_file_or_url(file_or_url): + if dnnlib.util.is_url(file_or_url): + return dnnlib.util.open_url(file_or_url, cache_dir=config.cache_dir) + return open(file_or_url, 'rb') + +def load_pkl(file_or_url): + with open_file_or_url(file_or_url) as file: + return pickle.load(file, encoding='latin1') + +def save_pkl(obj, filename): + with open(filename, 'wb') as file: + pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) + +#---------------------------------------------------------------------------- +# Image utils. + +def adjust_dynamic_range(data, drange_in, drange_out): + if drange_in != drange_out: + scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0])) + bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale) + data = data * scale + bias + return data + +def create_image_grid(images, grid_size=None): + assert images.ndim == 3 or images.ndim == 4 + num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2] + + if grid_size is not None: + grid_w, grid_h = tuple(grid_size) + else: + grid_w = max(int(np.ceil(np.sqrt(num))), 1) + grid_h = max((num - 1) // grid_w + 1, 1) + + grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype) + for idx in range(num): + x = (idx % grid_w) * img_w + y = (idx // grid_w) * img_h + grid[..., y : y + img_h, x : x + img_w] = images[idx] + return grid + +def convert_to_pil_image(image, drange=[0,1]): + assert image.ndim == 2 or image.ndim == 3 + if image.ndim == 3: + if image.shape[0] == 1: + image = image[0] # grayscale CHW => HW + else: + image = image.transpose(1, 2, 0) # CHW -> HWC + + image = adjust_dynamic_range(image, drange, [0,255]) + image = np.rint(image).clip(0, 255).astype(np.uint8) + fmt = 'RGB' if image.ndim == 3 else 'L' + return PIL.Image.fromarray(image, fmt) + +def save_image(image, filename, drange=[0,1], quality=95): + img = convert_to_pil_image(image, drange) + if '.jpg' in filename: + img.save(filename,"JPEG", quality=quality, optimize=True) + else: + img.save(filename) + +def save_image_grid(images, filename, drange=[0,1], grid_size=None): + convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename) + +#---------------------------------------------------------------------------- +# Locating results. + +def locate_run_dir(run_id_or_run_dir): + if isinstance(run_id_or_run_dir, str): + if os.path.isdir(run_id_or_run_dir): + return run_id_or_run_dir + converted = dnnlib.submission.submit.convert_path(run_id_or_run_dir) + if os.path.isdir(converted): + return converted + + run_dir_pattern = re.compile('^0*%s-' % str(run_id_or_run_dir)) + for search_dir in ['']: + full_search_dir = config.result_dir if search_dir == '' else os.path.normpath(os.path.join(config.result_dir, search_dir)) + run_dir = os.path.join(full_search_dir, str(run_id_or_run_dir)) + if os.path.isdir(run_dir): + return run_dir + run_dirs = sorted(glob.glob(os.path.join(full_search_dir, '*'))) + run_dirs = [run_dir for run_dir in run_dirs if run_dir_pattern.match(os.path.basename(run_dir))] + run_dirs = [run_dir for run_dir in run_dirs if os.path.isdir(run_dir)] + if len(run_dirs) == 1: + return run_dirs[0] + raise IOError('Cannot locate result subdir for run', run_id_or_run_dir) + +def list_network_pkls(run_id_or_run_dir, include_final=True): + run_dir = locate_run_dir(run_id_or_run_dir) + pkls = sorted(glob.glob(os.path.join(run_dir, 'network-*.pkl'))) + if len(pkls) >= 1 and os.path.basename(pkls[0]) == 'network-final.pkl': + if include_final: + pkls.append(pkls[0]) + del pkls[0] + return pkls + +def locate_network_pkl(run_id_or_run_dir_or_network_pkl, snapshot_or_network_pkl=None): + for candidate in [snapshot_or_network_pkl, run_id_or_run_dir_or_network_pkl]: + if isinstance(candidate, str): + if os.path.isfile(candidate): + return candidate + converted = dnnlib.submission.submit.convert_path(candidate) + if os.path.isfile(converted): + return converted + + pkls = list_network_pkls(run_id_or_run_dir_or_network_pkl) + if len(pkls) >= 1 and snapshot_or_network_pkl is None: + return pkls[-1] + + for pkl in pkls: + try: + name = os.path.splitext(os.path.basename(pkl))[0] + number = int(name.split('-')[-1]) + if number == snapshot_or_network_pkl: + return pkl + except ValueError: pass + except IndexError: pass + raise IOError('Cannot locate network pkl for snapshot', snapshot_or_network_pkl) + +def get_id_string_for_network_pkl(network_pkl): + p = network_pkl.replace('.pkl', '').replace('\\', '/').split('/') + return '-'.join(p[max(len(p) - 2, 0):]) + +#---------------------------------------------------------------------------- +# Loading data from previous training runs. + +def load_network_pkl(run_id_or_run_dir_or_network_pkl, snapshot_or_network_pkl=None): + return load_pkl(locate_network_pkl(run_id_or_run_dir_or_network_pkl, snapshot_or_network_pkl)) + +def parse_config_for_previous_run(run_id): + run_dir = locate_run_dir(run_id) + + # Parse config.txt. + cfg = defaultdict(dict) + with open(os.path.join(run_dir, 'config.txt'), 'rt') as f: + for line in f: + line = re.sub(r"^{?\s*'(\w+)':\s*{(.*)(},|}})$", r"\1 = {\2}", line.strip()) + if line.startswith('dataset =') or line.startswith('train ='): + exec(line, cfg, cfg) # pylint: disable=exec-used + + # Handle legacy options. + if 'file_pattern' in cfg['dataset']: + cfg['dataset']['tfrecord_dir'] = cfg['dataset'].pop('file_pattern').replace('-r??.tfrecords', '') + if 'mirror_augment' in cfg['dataset']: + cfg['train']['mirror_augment'] = cfg['dataset'].pop('mirror_augment') + if 'max_labels' in cfg['dataset']: + v = cfg['dataset'].pop('max_labels') + if v is None: v = 0 + if v == 'all': v = 'full' + cfg['dataset']['max_label_size'] = v + if 'max_images' in cfg['dataset']: + cfg['dataset'].pop('max_images') + return cfg + +def load_dataset_for_previous_run(run_id, **kwargs): # => dataset_obj, mirror_augment + cfg = parse_config_for_previous_run(run_id) + cfg['dataset'].update(kwargs) + dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **cfg['dataset']) + mirror_augment = cfg['train'].get('mirror_augment', False) + return dataset_obj, mirror_augment + +def apply_mirror_augment(minibatch): + mask = np.random.rand(minibatch.shape[0]) < 0.5 + minibatch = np.array(minibatch) + minibatch[mask] = minibatch[mask, :, :, ::-1] + return minibatch + +#---------------------------------------------------------------------------- +# Size and contents of the image snapshot grids that are exported +# periodically during training. + +def setup_snapshot_image_grid(G, training_set, + size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display. + layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label. + + # Select size. + gw = 1; gh = 1 + if size == '1080p': + gw = np.clip(1920 // G.output_shape[3], 3, 32) + gh = np.clip(1080 // G.output_shape[2], 2, 32) + if size == '4k': + gw = np.clip(3840 // G.output_shape[3], 7, 32) + gh = np.clip(2160 // G.output_shape[2], 4, 32) + + # Initialize data arrays. + reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) + labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) + latents = np.random.randn(gw * gh, *G.input_shape[1:]) + + # Random layout. + if layout == 'random': + reals[:], labels[:] = training_set.get_minibatch_np(gw * gh) + + # Class-conditional layouts. + class_layouts = dict(row_per_class=[gw,1], col_per_class=[1,gh], class4x4=[4,4]) + if layout in class_layouts: + bw, bh = class_layouts[layout] + nw = (gw - 1) // bw + 1 + nh = (gh - 1) // bh + 1 + blocks = [[] for _i in range(nw * nh)] + for _iter in range(1000000): + real, label = training_set.get_minibatch_np(1) + idx = np.argmax(label[0]) + while idx < len(blocks) and len(blocks[idx]) >= bw * bh: + idx += training_set.label_size + if idx < len(blocks): + blocks[idx].append((real, label)) + if all(len(block) >= bw * bh for block in blocks): + break + for i, block in enumerate(blocks): + for j, (real, label) in enumerate(block): + x = (i % nw) * bw + j % bw + y = (i // nw) * bh + j // bw + if x < gw and y < gh: + reals[x + y * gw] = real[0] + labels[x + y * gw] = label[0] + + return (gw, gh), reals, labels, latents + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/networks_progan.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/networks_progan.py new file mode 100644 index 0000000000000000000000000000000000000000..896f500b0bfca5c292b1cba8de79e270f6a08036 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/networks_progan.py @@ -0,0 +1,322 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Network architectures used in the ProGAN paper.""" + +import numpy as np +import tensorflow as tf + +# NOTE: Do not import any application-specific modules here! +# Specify all network parameters as kwargs. + +#---------------------------------------------------------------------------- + +def lerp(a, b, t): return a + (b - a) * t +def lerp_clip(a, b, t): return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) +def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + +#---------------------------------------------------------------------------- +# Get/create weight tensor for a convolutional or fully-connected layer. + +def get_weight(shape, gain=np.sqrt(2), use_wscale=False): + fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out] + std = gain / np.sqrt(fan_in) # He init + if use_wscale: + wscale = tf.constant(np.float32(std), name='wscale') + w = tf.get_variable('weight', shape=shape, initializer=tf.initializers.random_normal()) * wscale + else: + w = tf.get_variable('weight', shape=shape, initializer=tf.initializers.random_normal(0, std)) + return w + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense(x, fmaps, gain=np.sqrt(2), use_wscale=False): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# Convolutional layer. + +def conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Apply bias to the given activation tensor. + +def apply_bias(x): + b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) + b = tf.cast(b, x.dtype) + if len(x.shape) == 2: + return x + b + return x + tf.reshape(b, [1, -1, 1, 1]) + +#---------------------------------------------------------------------------- +# Leaky ReLU activation. Same as tf.nn.leaky_relu, but supports FP16. + +def leaky_relu(x, alpha=0.2): + with tf.name_scope('LeakyRelu'): + alpha = tf.constant(alpha, dtype=x.dtype, name='alpha') + return tf.maximum(x * alpha, x) + +#---------------------------------------------------------------------------- +# Nearest-neighbor upscaling layer. + +def upscale2d(x, factor=2): + assert isinstance(factor, int) and factor >= 1 + if factor == 1: return x + with tf.variable_scope('Upscale2D'): + s = x.shape + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x + +#---------------------------------------------------------------------------- +# Fused upscale2d + conv2d. +# Faster and uses less memory than performing the operations separately. + +def upscale2d_conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in] + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) + w = tf.cast(w, x.dtype) + os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2] + return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Box filter downscaling layer. + +def downscale2d(x, factor=2): + assert isinstance(factor, int) and factor >= 1 + if factor == 1: return x + with tf.variable_scope('Downscale2D'): + ksize = [1, 1, factor, factor] + return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') # NOTE: requires tf_config['graph_options.place_pruned_graph'] = True + +#---------------------------------------------------------------------------- +# Fused conv2d + downscale2d. +# Faster and uses less memory than performing the operations separately. + +def conv2d_downscale2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25 + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Pixelwise feature vector normalization. + +def pixel_norm(x, epsilon=1e-8): + with tf.variable_scope('PixelNorm'): + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon) + +#---------------------------------------------------------------------------- +# Minibatch standard deviation. + +def minibatch_stddev_layer(x, group_size=4, num_new_features=1): + with tf.variable_scope('MinibatchStddev'): + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + s = x.shape # [NCHW] Input shape. + y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c. + y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32. + y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels. + y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups + y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type. + y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. + +#---------------------------------------------------------------------------- +# Networks used in the ProgressiveGAN paper. + +def G_paper( + latents_in, # First input: Latent vectors [minibatch, latent_size]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 1, # Number of output color channels. Overridden based on dataset. + resolution = 32, # Output resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + latent_size = None, # Dimensionality of the latent vectors. None = min(fmap_base, fmap_max). + normalize_latents = True, # Normalize latent vectors before feeding them to the network? + use_wscale = True, # Enable equalized learning rate? + use_pixelnorm = True, # Enable pixelwise feature vector normalization? + pixelnorm_epsilon = 1e-8, # Constant epsilon for pixelwise feature vector normalization. + use_leakyrelu = True, # True = leaky ReLU, False = ReLU. + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = True, # True = use fused upscale2d + conv2d, False = separate upscale2d layers. + structure = None, # 'linear' = human-readable, 'recursive' = efficient, None = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + def PN(x): return pixel_norm(x, epsilon=pixelnorm_epsilon) if use_pixelnorm else x + if latent_size is None: latent_size = nf(0) + if structure is None: structure = 'linear' if is_template_graph else 'recursive' + act = leaky_relu if use_leakyrelu else tf.nn.relu + + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + combo_in = tf.cast(tf.concat([latents_in, labels_in], axis=1), dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + images_out = None + + # Building blocks. + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if res == 2: # 4x4 + if normalize_latents: x = pixel_norm(x, epsilon=pixelnorm_epsilon) + with tf.variable_scope('Dense'): + x = dense(x, fmaps=nf(res-1)*16, gain=np.sqrt(2)/4, use_wscale=use_wscale) # override gain to match the original Theano implementation + x = tf.reshape(x, [-1, nf(res-1), 4, 4]) + x = PN(act(apply_bias(x))) + with tf.variable_scope('Conv'): + x = PN(act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + else: # 8x8 and up + if fused_scale: + with tf.variable_scope('Conv0_up'): + x = PN(act(apply_bias(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + else: + x = upscale2d(x) + with tf.variable_scope('Conv0'): + x = PN(act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + with tf.variable_scope('Conv1'): + x = PN(act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) + return x + def torgb(x, res): # res = 2..resolution_log2 + lod = resolution_log2 - res + with tf.variable_scope('ToRGB_lod%d' % lod): + return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) + + # Linear structure: simple but inefficient. + if structure == 'linear': + x = block(combo_in, 2) + images_out = torgb(x, 2) + for res in range(3, resolution_log2 + 1): + lod = resolution_log2 - res + x = block(x, res) + img = torgb(x, res) + images_out = upscale2d(images_out) + with tf.variable_scope('Grow_lod%d' % lod): + images_out = lerp_clip(img, images_out, lod_in - lod) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def grow(x, res, lod): + y = block(x, res) + img = lambda: upscale2d(torgb(y, res), 2**lod) + if res > 2: img = cset(img, (lod_in > lod), lambda: upscale2d(lerp(torgb(y, res), upscale2d(torgb(x, res - 1)), lod_in - lod), 2**lod)) + if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) + return img() + images_out = grow(combo_in, 2, resolution_log2 - 2) + + assert images_out.dtype == tf.as_dtype(dtype) + images_out = tf.identity(images_out, name='images_out') + return images_out + + +def D_paper( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 1, # Number of input color channels. Overridden based on dataset. + resolution = 32, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + use_wscale = True, # Enable equalized learning rate? + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = True, # True = use fused conv2d + downscale2d, False = separate downscale2d layers. + structure = None, # 'linear' = human-readable, 'recursive' = efficient, None = select automatically + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + if structure is None: structure = 'linear' if is_template_graph else 'recursive' + act = leaky_relu + + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + scores_out = None + + # Building blocks. + def fromrgb(x, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): + return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, use_wscale=use_wscale))) + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if res >= 3: # 8x8 and up + with tf.variable_scope('Conv0'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale))) + if fused_scale: + with tf.variable_scope('Conv1_down'): + x = act(apply_bias(conv2d_downscale2d(x, fmaps=nf(res-2), kernel=3, use_wscale=use_wscale))) + else: + with tf.variable_scope('Conv1'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-2), kernel=3, use_wscale=use_wscale))) + x = downscale2d(x) + else: # 4x4 + if mbstd_group_size > 1: + x = minibatch_stddev_layer(x, mbstd_group_size) + with tf.variable_scope('Conv'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale))) + with tf.variable_scope('Dense0'): + x = act(apply_bias(dense(x, fmaps=nf(res-2), use_wscale=use_wscale))) + with tf.variable_scope('Dense1'): + x = apply_bias(dense(x, fmaps=1, gain=1, use_wscale=use_wscale)) + return x + + # Linear structure: simple but inefficient. + if structure == 'linear': + img = images_in + x = fromrgb(img, resolution_log2) + for res in range(resolution_log2, 2, -1): + lod = resolution_log2 - res + x = block(x, res) + img = downscale2d(img) + y = fromrgb(img, res - 1) + with tf.variable_scope('Grow_lod%d' % lod): + x = lerp_clip(x, y, lod_in - lod) + scores_out = block(x, 2) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def grow(res, lod): + x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) + if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) + x = block(x(), res); y = lambda: x + if res > 2: y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) + return y() + scores_out = grow(2, resolution_log2 - 2) + + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/networks_stylegan.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/networks_stylegan.py new file mode 100644 index 0000000000000000000000000000000000000000..4a1b6fbb10308cf4e8430336d2216a231a4ddc56 --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/networks_stylegan.py @@ -0,0 +1,661 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Network architectures used in the StyleGAN paper.""" + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +# NOTE: Do not import any application-specific modules here! +# Specify all network parameters as kwargs. + +#---------------------------------------------------------------------------- +# Primitive ops for manipulating 4D activation tensors. +# The gradients of these are not necessary efficient or even meaningful. + +def _blur2d(x, f=[1,2,1], normalize=True, flip=False, stride=1): + assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) + assert isinstance(stride, int) and stride >= 1 + + # Finalize filter kernel. + f = np.array(f, dtype=np.float32) + if f.ndim == 1: + f = f[:, np.newaxis] * f[np.newaxis, :] + assert f.ndim == 2 + if normalize: + f /= np.sum(f) + if flip: + f = f[::-1, ::-1] + f = f[:, :, np.newaxis, np.newaxis] + f = np.tile(f, [1, 1, int(x.shape[1]), 1]) + + # No-op => early exit. + if f.shape == (1, 1) and f[0,0] == 1: + return x + + # Convolve using depthwise_conv2d. + orig_dtype = x.dtype + x = tf.cast(x, tf.float32) # tf.nn.depthwise_conv2d() doesn't support fp16 + f = tf.constant(f, dtype=x.dtype, name='filter') + strides = [1, 1, stride, stride] + x = tf.nn.depthwise_conv2d(x, f, strides=strides, padding='SAME', data_format='NCHW') + x = tf.cast(x, orig_dtype) + return x + +def _upscale2d(x, factor=2, gain=1): + assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) + assert isinstance(factor, int) and factor >= 1 + + # Apply gain. + if gain != 1: + x *= gain + + # No-op => early exit. + if factor == 1: + return x + + # Upscale using tf.tile(). + s = x.shape + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x + +def _downscale2d(x, factor=2, gain=1): + assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) + assert isinstance(factor, int) and factor >= 1 + + # 2x2, float32 => downscale using _blur2d(). + if factor == 2 and x.dtype == tf.float32: + f = [np.sqrt(gain) / factor] * factor + return _blur2d(x, f=f, normalize=False, stride=factor) + + # Apply gain. + if gain != 1: + x *= gain + + # No-op => early exit. + if factor == 1: + return x + + # Large factor => downscale using tf.nn.avg_pool(). + # NOTE: Requires tf_config['graph_options.place_pruned_graph']=True to work. + ksize = [1, 1, factor, factor] + return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') + +#---------------------------------------------------------------------------- +# High-level ops for manipulating 4D activation tensors. +# The gradients of these are meant to be as efficient as possible. + +def blur2d(x, f=[1,2,1], normalize=True): + with tf.variable_scope('Blur2D'): + @tf.custom_gradient + def func(x): + y = _blur2d(x, f, normalize) + @tf.custom_gradient + def grad(dy): + dx = _blur2d(dy, f, normalize, flip=True) + return dx, lambda ddx: _blur2d(ddx, f, normalize) + return y, grad + return func(x) + +def upscale2d(x, factor=2): + with tf.variable_scope('Upscale2D'): + @tf.custom_gradient + def func(x): + y = _upscale2d(x, factor) + @tf.custom_gradient + def grad(dy): + dx = _downscale2d(dy, factor, gain=factor**2) + return dx, lambda ddx: _upscale2d(ddx, factor) + return y, grad + return func(x) + +def downscale2d(x, factor=2): + with tf.variable_scope('Downscale2D'): + @tf.custom_gradient + def func(x): + y = _downscale2d(x, factor) + @tf.custom_gradient + def grad(dy): + dx = _upscale2d(dy, factor, gain=1/factor**2) + return dx, lambda ddx: _downscale2d(ddx, factor) + return y, grad + return func(x) + +#---------------------------------------------------------------------------- +# Get/create weight tensor for a convolutional or fully-connected layer. + +def get_weight(shape, gain=np.sqrt(2), use_wscale=False, lrmul=1): + fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out] + he_std = gain / np.sqrt(fan_in) # He init + + # Equalized learning rate and custom learning rate multiplier. + if use_wscale: + init_std = 1.0 / lrmul + runtime_coef = he_std * lrmul + else: + init_std = he_std / lrmul + runtime_coef = lrmul + + # Create variable. + init = tf.initializers.random_normal(0, init_std) + return tf.get_variable('weight', shape=shape, initializer=init) * runtime_coef + +#---------------------------------------------------------------------------- +# Fully-connected layer. + +def dense(x, fmaps, **kwargs): + if len(x.shape) > 2: + x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) + w = get_weight([x.shape[1].value, fmaps], **kwargs) + w = tf.cast(w, x.dtype) + return tf.matmul(x, w) + +#---------------------------------------------------------------------------- +# Convolutional layer. + +def conv2d(x, fmaps, kernel, **kwargs): + assert kernel >= 1 and kernel % 2 == 1 + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Fused convolution + scaling. +# Faster and uses less memory than performing the operations separately. + +def upscale2d_conv2d(x, fmaps, kernel, fused_scale='auto', **kwargs): + assert kernel >= 1 and kernel % 2 == 1 + assert fused_scale in [True, False, 'auto'] + if fused_scale == 'auto': + fused_scale = min(x.shape[2:]) * 2 >= 128 + + # Not fused => call the individual ops directly. + if not fused_scale: + return conv2d(upscale2d(x), fmaps, kernel, **kwargs) + + # Fused => perform both ops simultaneously using tf.nn.conv2d_transpose(). + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) + w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in] + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) + w = tf.cast(w, x.dtype) + os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2] + return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +def conv2d_downscale2d(x, fmaps, kernel, fused_scale='auto', **kwargs): + assert kernel >= 1 and kernel % 2 == 1 + assert fused_scale in [True, False, 'auto'] + if fused_scale == 'auto': + fused_scale = min(x.shape[2:]) >= 128 + + # Not fused => call the individual ops directly. + if not fused_scale: + return downscale2d(conv2d(x, fmaps, kernel, **kwargs)) + + # Fused => perform both ops simultaneously using tf.nn.conv2d(). + w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) + w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') + w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25 + w = tf.cast(w, x.dtype) + return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW') + +#---------------------------------------------------------------------------- +# Apply bias to the given activation tensor. + +def apply_bias(x, lrmul=1): + b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul + b = tf.cast(b, x.dtype) + if len(x.shape) == 2: + return x + b + return x + tf.reshape(b, [1, -1, 1, 1]) + +#---------------------------------------------------------------------------- +# Leaky ReLU activation. More efficient than tf.nn.leaky_relu() and supports FP16. + +def leaky_relu(x, alpha=0.2): + with tf.variable_scope('LeakyReLU'): + alpha = tf.constant(alpha, dtype=x.dtype, name='alpha') + @tf.custom_gradient + def func(x): + y = tf.maximum(x, x * alpha) + @tf.custom_gradient + def grad(dy): + dx = tf.where(y >= 0, dy, dy * alpha) + return dx, lambda ddx: tf.where(y >= 0, ddx, ddx * alpha) + return y, grad + return func(x) + +#---------------------------------------------------------------------------- +# Pixelwise feature vector normalization. + +def pixel_norm(x, epsilon=1e-8): + with tf.variable_scope('PixelNorm'): + epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon') + return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon) + +#---------------------------------------------------------------------------- +# Instance normalization. + +def instance_norm(x, epsilon=1e-8): + assert len(x.shape) == 4 # NCHW + with tf.variable_scope('InstanceNorm'): + orig_dtype = x.dtype + x = tf.cast(x, tf.float32) + x -= tf.reduce_mean(x, axis=[2,3], keepdims=True) + epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon') + x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=[2,3], keepdims=True) + epsilon) + x = tf.cast(x, orig_dtype) + return x + +#---------------------------------------------------------------------------- +# Style modulation. + +def style_mod(x, dlatent, **kwargs): + with tf.variable_scope('StyleMod'): + style = apply_bias(dense(dlatent, fmaps=x.shape[1]*2, gain=1, **kwargs)) + style = tf.reshape(style, [-1, 2, x.shape[1]] + [1] * (len(x.shape) - 2)) + return x * (style[:,0] + 1) + style[:,1] + +#---------------------------------------------------------------------------- +# Noise input. + +def apply_noise(x, noise_var=None, randomize_noise=True): + assert len(x.shape) == 4 # NCHW + with tf.variable_scope('Noise'): + if noise_var is None or randomize_noise: + noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) + else: + noise = tf.cast(noise_var, x.dtype) + weight = tf.get_variable('weight', shape=[x.shape[1].value], initializer=tf.initializers.zeros()) + return x + noise * tf.reshape(tf.cast(weight, x.dtype), [1, -1, 1, 1]) + +#---------------------------------------------------------------------------- +# Minibatch standard deviation. + +def minibatch_stddev_layer(x, group_size=4, num_new_features=1): + with tf.variable_scope('MinibatchStddev'): + group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. + s = x.shape # [NCHW] Input shape. + y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c. + y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32. + y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group. + y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group. + y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group. + y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels. + y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups + y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type. + y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels. + return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. + +#---------------------------------------------------------------------------- +# Style-based generator used in the StyleGAN paper. +# Composed of two sub-networks (G_mapping and G_synthesis) that are defined below. + +def G_style( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + truncation_psi = 0.7, # Style strength multiplier for the truncation trick. None = disable. + truncation_cutoff = 8, # Number of layers for which to apply the truncation trick. None = disable. + truncation_psi_val = None, # Value for truncation_psi to use during validation. + truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. + dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. + style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. + is_training = False, # Network is under training? Enables and disables specific features. + is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. + **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis). + + # Validate arguments. + assert not is_training or not is_validation + assert isinstance(components, dnnlib.EasyDict) + if is_validation: + truncation_psi = truncation_psi_val + truncation_cutoff = truncation_cutoff_val + if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): + truncation_psi = None + if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): + truncation_cutoff = None + if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): + dlatent_avg_beta = None + if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): + style_mixing_prob = None + + # Setup components. + if 'synthesis' not in components: + components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs) + num_layers = components.synthesis.input_shape[1] + dlatent_size = components.synthesis.input_shape[2] + if 'mapping' not in components: + components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) + + # Setup variables. + lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) + dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) + + # Evaluate mapping network. + dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) + + # Update moving average of W. + if dlatent_avg_beta is not None: + with tf.variable_scope('DlatentAvg'): + batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) + update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) + with tf.control_dependencies([update_op]): + dlatents = tf.identity(dlatents) + + # Perform style mixing regularization. + if style_mixing_prob is not None: + with tf.name_scope('StyleMix'): + latents2 = tf.random_normal(tf.shape(latents_in)) + dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs) + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 + mixing_cutoff = tf.cond( + tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, + lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), + lambda: cur_layers) + dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) + + # Apply truncation trick. + if truncation_psi is not None and truncation_cutoff is not None: + with tf.variable_scope('Truncation'): + layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] + ones = np.ones(layer_idx.shape, dtype=np.float32) + coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) + dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) + + # Evaluate synthesis network. + with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]): + images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Mapping network used in the StyleGAN paper. + +def G_mapping( + latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. + labels_in, # Second input: Conditioning labels [minibatch, label_size]. + latent_size = 512, # Latent vector (Z) dimensionality. + label_size = 0, # Label dimensionality, 0 if no labels. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. + mapping_layers = 8, # Number of mapping layers. + mapping_fmaps = 512, # Number of activations in the mapping layers. + mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. + mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'. + use_wscale = True, # Enable equalized learning rate? + normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? + dtype = 'float32', # Data type to use for activations and outputs. + **_kwargs): # Ignore unrecognized keyword args. + + act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[mapping_nonlinearity] + + # Inputs. + latents_in.set_shape([None, latent_size]) + labels_in.set_shape([None, label_size]) + latents_in = tf.cast(latents_in, dtype) + labels_in = tf.cast(labels_in, dtype) + x = latents_in + + # Embed labels and concatenate them with latents. + if label_size: + with tf.variable_scope('LabelConcat'): + w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal()) + y = tf.matmul(labels_in, tf.cast(w, dtype)) + x = tf.concat([x, y], axis=1) + + # Normalize latents. + if normalize_latents: + x = pixel_norm(x) + + # Mapping layers. + for layer_idx in range(mapping_layers): + with tf.variable_scope('Dense%d' % layer_idx): + fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps + x = dense(x, fmaps=fmaps, gain=gain, use_wscale=use_wscale, lrmul=mapping_lrmul) + x = apply_bias(x, lrmul=mapping_lrmul) + x = act(x) + + # Broadcast. + if dlatent_broadcast is not None: + with tf.variable_scope('Broadcast'): + x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) + + # Output. + assert x.dtype == tf.as_dtype(dtype) + return tf.identity(x, name='dlatents_out') + +#---------------------------------------------------------------------------- +# Synthesis network used in the StyleGAN paper. + +def G_synthesis( + dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. + dlatent_size = 512, # Disentangled latent (W) dimensionality. + num_channels = 3, # Number of output color channels. + resolution = 1024, # Output resolution. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + use_styles = True, # Enable style inputs? + const_input_layer = True, # First layer is a learned constant? + use_noise = True, # Enable noise inputs? + randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu' + use_wscale = True, # Enable equalized learning rate? + use_pixel_norm = False, # Enable pixelwise feature vector normalization? + use_instance_norm = True, # Enable instance normalization? + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. + blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering. + structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + def blur(x): return blur2d(x, blur_filter) if blur_filter else x + if is_template_graph: force_clean_graph = True + if force_clean_graph: randomize_noise = False + if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' + act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] + num_layers = resolution_log2 * 2 - 2 + num_styles = num_layers if use_styles else 1 + images_out = None + + # Primary inputs. + dlatents_in.set_shape([None, num_styles, dlatent_size]) + dlatents_in = tf.cast(dlatents_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) + + # Noise inputs. + noise_inputs = [] + if use_noise: + for layer_idx in range(num_layers): + res = layer_idx // 2 + 2 + shape = [1, use_noise, 2**res, 2**res] + noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) + + # Things to do at the end of each layer. + def layer_epilogue(x, layer_idx): + if use_noise: + x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise) + x = apply_bias(x) + x = act(x) + if use_pixel_norm: + x = pixel_norm(x) + if use_instance_norm: + x = instance_norm(x) + if use_styles: + x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale) + return x + + # Early layers. + with tf.variable_scope('4x4'): + if const_input_layer: + with tf.variable_scope('Const'): + x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones()) + x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0) + else: + with tf.variable_scope('Dense'): + x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale) # tweak gain to match the official implementation of Progressing GAN + x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0) + with tf.variable_scope('Conv'): + x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1) + + # Building blocks for remaining layers. + def block(res, x): # res = 3..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + with tf.variable_scope('Conv0_up'): + x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4) + with tf.variable_scope('Conv1'): + x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3) + return x + def torgb(res, x): # res = 2..resolution_log2 + lod = resolution_log2 - res + with tf.variable_scope('ToRGB_lod%d' % lod): + return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) + + # Fixed structure: simple and efficient, but does not support progressive growing. + if structure == 'fixed': + for res in range(3, resolution_log2 + 1): + x = block(res, x) + images_out = torgb(resolution_log2, x) + + # Linear structure: simple but inefficient. + if structure == 'linear': + images_out = torgb(2, x) + for res in range(3, resolution_log2 + 1): + lod = resolution_log2 - res + x = block(res, x) + img = torgb(res, x) + images_out = upscale2d(images_out) + with tf.variable_scope('Grow_lod%d' % lod): + images_out = tflib.lerp_clip(img, images_out, lod_in - lod) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def cset(cur_lambda, new_cond, new_lambda): + return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + def grow(x, res, lod): + y = block(res, x) + img = lambda: upscale2d(torgb(res, y), 2**lod) + img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod)) + if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) + return img() + images_out = grow(x, 3, resolution_log2 - 3) + + assert images_out.dtype == tf.as_dtype(dtype) + return tf.identity(images_out, name='images_out') + +#---------------------------------------------------------------------------- +# Discriminator used in the StyleGAN paper. + +def D_basic( + images_in, # First input: Images [minibatch, channel, height, width]. + labels_in, # Second input: Labels [minibatch, label_size]. + num_channels = 1, # Number of input color channels. Overridden based on dataset. + resolution = 32, # Input resolution. Overridden based on dataset. + label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. + fmap_base = 8192, # Overall multiplier for the number of feature maps. + fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. + fmap_max = 512, # Maximum number of feature maps in any layer. + nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', + use_wscale = True, # Enable equalized learning rate? + mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. + mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. + dtype = 'float32', # Data type to use for activations and outputs. + fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. + blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering. + structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. + is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. + **_kwargs): # Ignore unrecognized keyword args. + + resolution_log2 = int(np.log2(resolution)) + assert resolution == 2**resolution_log2 and resolution >= 4 + def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) + def blur(x): return blur2d(x, blur_filter) if blur_filter else x + if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' + act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] + + images_in.set_shape([None, num_channels, resolution, resolution]) + labels_in.set_shape([None, label_size]) + images_in = tf.cast(images_in, dtype) + labels_in = tf.cast(labels_in, dtype) + lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) + scores_out = None + + # Building blocks. + def fromrgb(x, res): # res = 2..resolution_log2 + with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): + return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale))) + def block(x, res): # res = 2..resolution_log2 + with tf.variable_scope('%dx%d' % (2**res, 2**res)): + if res >= 3: # 8x8 and up + with tf.variable_scope('Conv0'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) + with tf.variable_scope('Conv1_down'): + x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale))) + else: # 4x4 + if mbstd_group_size > 1: + x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) + with tf.variable_scope('Conv'): + x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) + with tf.variable_scope('Dense0'): + x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale))) + with tf.variable_scope('Dense1'): + x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale)) + return x + + # Fixed structure: simple and efficient, but does not support progressive growing. + if structure == 'fixed': + x = fromrgb(images_in, resolution_log2) + for res in range(resolution_log2, 2, -1): + x = block(x, res) + scores_out = block(x, 2) + + # Linear structure: simple but inefficient. + if structure == 'linear': + img = images_in + x = fromrgb(img, resolution_log2) + for res in range(resolution_log2, 2, -1): + lod = resolution_log2 - res + x = block(x, res) + img = downscale2d(img) + y = fromrgb(img, res - 1) + with tf.variable_scope('Grow_lod%d' % lod): + x = tflib.lerp_clip(x, y, lod_in - lod) + scores_out = block(x, 2) + + # Recursive structure: complex but efficient. + if structure == 'recursive': + def cset(cur_lambda, new_cond, new_lambda): + return lambda: tf.cond(new_cond, new_lambda, cur_lambda) + def grow(res, lod): + x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) + if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) + x = block(x(), res); y = lambda: x + if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) + return y() + scores_out = grow(2, resolution_log2 - 2) + + # Label conditioning from "Which Training Methods for GANs do actually Converge?" + if label_size: + with tf.variable_scope('LabelSwitch'): + scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True) + + assert scores_out.dtype == tf.as_dtype(dtype) + scores_out = tf.identity(scores_out, name='scores_out') + return scores_out + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/converters/stylegan_official/training/training_loop.py b/ContraCLIP/models/genforce/converters/stylegan_official/training/training_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..d9ccb45b1a0321f1d938efa6a62229ffe396dcfe --- /dev/null +++ b/ContraCLIP/models/genforce/converters/stylegan_official/training/training_loop.py @@ -0,0 +1,278 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# This work is licensed under the Creative Commons Attribution-NonCommercial +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. + +"""Main training script.""" + +import os +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from dnnlib.tflib.autosummary import autosummary + +import config +import train +from training import dataset +from training import misc +from metrics import metric_base + +#---------------------------------------------------------------------------- +# Just-in-time processing of training images before feeding them to the networks. + +def process_reals(x, lod, mirror_augment, drange_data, drange_net): + with tf.name_scope('ProcessReals'): + with tf.name_scope('DynamicRange'): + x = tf.cast(x, tf.float32) + x = misc.adjust_dynamic_range(x, drange_data, drange_net) + if mirror_augment: + with tf.name_scope('MirrorAugment'): + s = tf.shape(x) + mask = tf.random_uniform([s[0], 1, 1, 1], 0.0, 1.0) + mask = tf.tile(mask, [1, s[1], s[2], s[3]]) + x = tf.where(mask < 0.5, x, tf.reverse(x, axis=[3])) + with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail. + s = tf.shape(x) + y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) + y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) + y = tf.tile(y, [1, 1, 1, 2, 1, 2]) + y = tf.reshape(y, [-1, s[1], s[2], s[3]]) + x = tflib.lerp(x, y, lod - tf.floor(lod)) + with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks. + s = tf.shape(x) + factor = tf.cast(2 ** tf.floor(lod), tf.int32) + x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) + x = tf.tile(x, [1, 1, 1, factor, 1, factor]) + x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) + return x + +#---------------------------------------------------------------------------- +# Evaluate time-varying training parameters. + +def training_schedule( + cur_nimg, + training_set, + num_gpus, + lod_initial_resolution = 4, # Image resolution used at the beginning. + lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution. + lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers. + minibatch_base = 16, # Maximum minibatch size, divided evenly among GPUs. + minibatch_dict = {}, # Resolution-specific overrides. + max_minibatch_per_gpu = {}, # Resolution-specific maximum minibatch size per GPU. + G_lrate_base = 0.001, # Learning rate for the generator. + G_lrate_dict = {}, # Resolution-specific overrides. + D_lrate_base = 0.001, # Learning rate for the discriminator. + D_lrate_dict = {}, # Resolution-specific overrides. + lrate_rampup_kimg = 0, # Duration of learning rate ramp-up. + tick_kimg_base = 160, # Default interval of progress snapshots. + tick_kimg_dict = {4: 160, 8:140, 16:120, 32:100, 64:80, 128:60, 256:40, 512:30, 1024:20}): # Resolution-specific overrides. + + # Initialize result dict. + s = dnnlib.EasyDict() + s.kimg = cur_nimg / 1000.0 + + # Training phase. + phase_dur = lod_training_kimg + lod_transition_kimg + phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0 + phase_kimg = s.kimg - phase_idx * phase_dur + + # Level-of-detail and resolution. + s.lod = training_set.resolution_log2 + s.lod -= np.floor(np.log2(lod_initial_resolution)) + s.lod -= phase_idx + if lod_transition_kimg > 0: + s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg + s.lod = max(s.lod, 0.0) + s.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(s.lod))) + + # Minibatch size. + s.minibatch = minibatch_dict.get(s.resolution, minibatch_base) + s.minibatch -= s.minibatch % num_gpus + if s.resolution in max_minibatch_per_gpu: + s.minibatch = min(s.minibatch, max_minibatch_per_gpu[s.resolution] * num_gpus) + + # Learning rate. + s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base) + s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base) + if lrate_rampup_kimg > 0: + rampup = min(s.kimg / lrate_rampup_kimg, 1.0) + s.G_lrate *= rampup + s.D_lrate *= rampup + + # Other parameters. + s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base) + return s + +#---------------------------------------------------------------------------- +# Main training script. + +def training_loop( + submit_config, + G_args = {}, # Options for generator network. + D_args = {}, # Options for discriminator network. + G_opt_args = {}, # Options for generator optimizer. + D_opt_args = {}, # Options for discriminator optimizer. + G_loss_args = {}, # Options for generator loss. + D_loss_args = {}, # Options for discriminator loss. + dataset_args = {}, # Options for dataset.load_dataset(). + sched_args = {}, # Options for train.TrainingSchedule. + grid_args = {}, # Options for train.setup_snapshot_image_grid(). + metric_arg_list = [], # Options for MetricGroup. + tf_config = {}, # Options for tflib.init_tf(). + G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights. + D_repeats = 1, # How many times the discriminator is trained per G iteration. + minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters. + reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? + total_kimg = 15000, # Total length of the training, measured in thousands of real images. + mirror_augment = False, # Enable mirror augment? + drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. + image_snapshot_ticks = 1, # How often to export image snapshots? + network_snapshot_ticks = 10, # How often to export network snapshots? + save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file? + save_weight_histograms = False, # Include weight histograms in the tfevents file? + resume_run_id = None, # Run ID or network pkl to resume training from, None = start from scratch. + resume_snapshot = None, # Snapshot index to resume training from, None = autodetect. + resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. + resume_time = 0.0): # Assumed wallclock time at the beginning. Affects reporting. + + # Initialize dnnlib and TensorFlow. + ctx = dnnlib.RunContext(submit_config, train) + tflib.init_tf(tf_config) + + # Load training set. + training_set = dataset.load_dataset(data_dir=config.data_dir, verbose=True, **dataset_args) + + # Construct networks. + with tf.device('/gpu:0'): + if resume_run_id is not None: + network_pkl = misc.locate_network_pkl(resume_run_id, resume_snapshot) + print('Loading networks from "%s"...' % network_pkl) + G, D, Gs = misc.load_pkl(network_pkl) + else: + print('Constructing networks...') + G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) + D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) + Gs = G.clone('Gs') + G.print_layers(); D.print_layers() + + print('Building TensorFlow graph...') + with tf.name_scope('Inputs'), tf.device('/cpu:0'): + lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) + lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) + minibatch_in = tf.placeholder(tf.int32, name='minibatch_in', shape=[]) + minibatch_split = minibatch_in // submit_config.num_gpus + Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0 + + G_opt = tflib.Optimizer(name='TrainG', learning_rate=lrate_in, **G_opt_args) + D_opt = tflib.Optimizer(name='TrainD', learning_rate=lrate_in, **D_opt_args) + for gpu in range(submit_config.num_gpus): + with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): + G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') + D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') + lod_assign_ops = [tf.assign(G_gpu.find_var('lod'), lod_in), tf.assign(D_gpu.find_var('lod'), lod_in)] + reals, labels = training_set.get_minibatch_tf() + reals = process_reals(reals, lod_in, mirror_augment, training_set.dynamic_range, drange_net) + with tf.name_scope('G_loss'), tf.control_dependencies(lod_assign_ops): + G_loss = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_split, **G_loss_args) + with tf.name_scope('D_loss'), tf.control_dependencies(lod_assign_ops): + D_loss = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_split, reals=reals, labels=labels, **D_loss_args) + G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) + D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) + G_train_op = G_opt.apply_updates() + D_train_op = D_opt.apply_updates() + + Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta) + with tf.device('/gpu:0'): + try: + peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() + except tf.errors.NotFoundError: + peak_gpu_mem_op = tf.constant(0) + + print('Setting up snapshot image grid...') + grid_size, grid_reals, grid_labels, grid_latents = misc.setup_snapshot_image_grid(G, training_set, **grid_args) + sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, num_gpus=submit_config.num_gpus, **sched_args) + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch//submit_config.num_gpus) + + print('Setting up run dir...') + misc.save_image_grid(grid_reals, os.path.join(submit_config.run_dir, 'reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) + misc.save_image_grid(grid_fakes, os.path.join(submit_config.run_dir, 'fakes%06d.png' % resume_kimg), drange=drange_net, grid_size=grid_size) + summary_log = tf.summary.FileWriter(submit_config.run_dir) + if save_tf_graph: + summary_log.add_graph(tf.get_default_graph()) + if save_weight_histograms: + G.setup_weight_histograms(); D.setup_weight_histograms() + metrics = metric_base.MetricGroup(metric_arg_list) + + print('Training...\n') + ctx.update('', cur_epoch=resume_kimg, max_epoch=total_kimg) + maintenance_time = ctx.get_last_update_interval() + cur_nimg = int(resume_kimg * 1000) + cur_tick = 0 + tick_start_nimg = cur_nimg + prev_lod = -1.0 + while cur_nimg < total_kimg * 1000: + if ctx.should_stop(): break + + # Choose training parameters and configure training ops. + sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, num_gpus=submit_config.num_gpus, **sched_args) + training_set.configure(sched.minibatch // submit_config.num_gpus, sched.lod) + if reset_opt_for_new_lod: + if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): + G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state() + prev_lod = sched.lod + + # Run training ops. + for _mb_repeat in range(minibatch_repeats): + for _D_repeat in range(D_repeats): + tflib.run([D_train_op, Gs_update_op], {lod_in: sched.lod, lrate_in: sched.D_lrate, minibatch_in: sched.minibatch}) + cur_nimg += sched.minibatch + tflib.run([G_train_op], {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_in: sched.minibatch}) + + # Perform maintenance tasks once per tick. + done = (cur_nimg >= total_kimg * 1000) + if cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: + cur_tick += 1 + tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 + tick_start_nimg = cur_nimg + tick_time = ctx.get_time_since_last_update() + total_time = ctx.get_time_since_start() + resume_time + + # Report progress. + print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %-4.1f' % ( + autosummary('Progress/tick', cur_tick), + autosummary('Progress/kimg', cur_nimg / 1000.0), + autosummary('Progress/lod', sched.lod), + autosummary('Progress/minibatch', sched.minibatch), + dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)), + autosummary('Timing/sec_per_tick', tick_time), + autosummary('Timing/sec_per_kimg', tick_time / tick_kimg), + autosummary('Timing/maintenance_sec', maintenance_time), + autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30))) + autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) + autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) + + # Save snapshots. + if cur_tick % image_snapshot_ticks == 0 or done: + grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch//submit_config.num_gpus) + misc.save_image_grid(grid_fakes, os.path.join(submit_config.run_dir, 'fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) + if cur_tick % network_snapshot_ticks == 0 or done or cur_tick == 1: + pkl = os.path.join(submit_config.run_dir, 'network-snapshot-%06d.pkl' % (cur_nimg // 1000)) + misc.save_pkl((G, D, Gs), pkl) + metrics.run(pkl, run_dir=submit_config.run_dir, num_gpus=submit_config.num_gpus, tf_config=tf_config) + + # Update summaries and RunContext. + metrics.update_autosummaries() + tflib.autosummary.save_summaries(summary_log, cur_nimg) + ctx.update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg) + maintenance_time = ctx.get_last_update_interval() - tick_time + + # Write final results. + misc.save_pkl((G, D, Gs), os.path.join(submit_config.run_dir, 'network-final.pkl')) + summary_log.close() + + ctx.close() + +#---------------------------------------------------------------------------- diff --git a/ContraCLIP/models/genforce/datasets/README.md b/ContraCLIP/models/genforce/datasets/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5afd6eca5a373ec567df4f4082010f3bf21aff3 --- /dev/null +++ b/ContraCLIP/models/genforce/datasets/README.md @@ -0,0 +1,24 @@ +# Data Preparation + +## Data Format + +Currently, our dataloader is able to load data from + +- a directory that is full of images (support using [`turbojpeg`](https://pypi.org/project/PyTurboJPEG/) to speed up decoding images.) +- a `lmdb` file +- an image list +- a compressed file (i.e., `zip` package) + +by modifying `data_format` in the configuration. + +**NOTE:** For some computing clusters whose I/O speed may be slow, we recommend the `zip` format for two reasons. First, `zip` file is easy to create. Second, this can load a large file at one time instead of loading small files repeatedly. + +## Data Sampling + +Considering that most generative models are trained in the unit of iterations instead of epochs, we change the default data loader to an *iter-based* one. Besides, the original distributed data sampler is also modified to make the shuffling correspond to iteration instead of epoch. + +**NOTE:** In order to reduce the data re-loading cost between epochs, we manually extend the length of sampled indices to make it much more efficient. + +## Data Augmentation + +To better align with the original implementation of PGGAN and StyleGAN (i.e., models that require progressive training), we support progressive resize in `transforms.py`, which downsamples images with the maximum resize factor of 2 at each time. diff --git a/ContraCLIP/models/genforce/datasets/__init__.py b/ContraCLIP/models/genforce/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..356bc45486b70d3acd896dd810c135bafc16542e --- /dev/null +++ b/ContraCLIP/models/genforce/datasets/__init__.py @@ -0,0 +1,7 @@ +# python3.7 +"""Collects datasets and data loaders.""" + +from .datasets import BaseDataset +from .dataloaders import IterDataLoader + +__all__ = ['BaseDataset', 'IterDataLoader'] diff --git a/ContraCLIP/models/genforce/datasets/dataloaders.py b/ContraCLIP/models/genforce/datasets/dataloaders.py new file mode 100644 index 0000000000000000000000000000000000000000..3a9b31c4545233e9b0c0e134e627fa34dffe806f --- /dev/null +++ b/ContraCLIP/models/genforce/datasets/dataloaders.py @@ -0,0 +1,128 @@ +# python3.7 +"""Contains the class of data loader.""" + +import argparse + +from torch.utils.data import DataLoader +from .distributed_sampler import DistributedSampler +from .datasets import BaseDataset + + +__all__ = ['IterDataLoader'] + + +class IterDataLoader(object): + """Iteration-based data loader.""" + + def __init__(self, + dataset, + batch_size, + shuffle=True, + num_workers=1, + current_iter=0, + repeat=1): + """Initializes the data loader. + + Args: + dataset: The dataset to load data from. + batch_size: The batch size on each GPU. + shuffle: Whether to shuffle the data. (default: True) + num_workers: Number of data workers for each GPU. (default: 1) + current_iter: The current number of iterations. (default: 0) + repeat: The repeating number of the whole dataloader. (default: 1) + """ + self._dataset = dataset + self.batch_size = batch_size + self.shuffle = shuffle + self.num_workers = num_workers + self._dataloader = None + self.iter_loader = None + self._iter = current_iter + self.repeat = repeat + self.build_dataloader() + + def build_dataloader(self): + """Builds data loader.""" + dist_sampler = DistributedSampler(self._dataset, + shuffle=self.shuffle, + current_iter=self._iter, + repeat=self.repeat) + + self._dataloader = DataLoader(self._dataset, + batch_size=self.batch_size, + shuffle=(dist_sampler is None), + num_workers=self.num_workers, + drop_last=self.shuffle, + pin_memory=True, + sampler=dist_sampler) + self.iter_loader = iter(self._dataloader) + + + def overwrite_param(self, batch_size=None, resolution=None): + """Overwrites some parameters for progressive training.""" + if (not batch_size) and (not resolution): + return + if (batch_size == self.batch_size) and ( + resolution == self.dataset.resolution): + return + if batch_size: + self.batch_size = batch_size + if resolution: + self._dataset.resolution = resolution + self.build_dataloader() + + @property + def iter(self): + """Returns the current iteration.""" + return self._iter + + @property + def dataset(self): + """Returns the dataset.""" + return self._dataset + + @property + def dataloader(self): + """Returns the data loader.""" + return self._dataloader + + def __next__(self): + try: + data = next(self.iter_loader) + self._iter += 1 + except StopIteration: + self._dataloader.sampler.__reset__(self._iter) + self.iter_loader = iter(self._dataloader) + data = next(self.iter_loader) + self._iter += 1 + return data + + def __len__(self): + return len(self._dataloader) + + +def dataloader_test(root_dir, test_num=10): + """Tests data loader.""" + res = 2 + bs = 2 + dataset = BaseDataset(root_dir=root_dir, resolution=res) + dataloader = IterDataLoader(dataset=dataset, + batch_size=bs, + shuffle=False) + for _ in range(test_num): + data_batch = next(dataloader) + image = data_batch['image'] + assert image.shape == (bs, 3, res, res) + res *= 2 + bs += 1 + dataloader.overwrite_param(batch_size=bs, resolution=res) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Test Data Loader.') + parser.add_argument('root_dir', type=str, + help='Root directory of the dataset.') + parser.add_argument('--test_num', type=int, default=10, + help='Number of tests. (default: %(default)s)') + args = parser.parse_args() + dataloader_test(args.root_dir, args.test_num) diff --git a/ContraCLIP/models/genforce/datasets/datasets.py b/ContraCLIP/models/genforce/datasets/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..d9964afe96e62b11ae4ec71857714101ce7e42b9 --- /dev/null +++ b/ContraCLIP/models/genforce/datasets/datasets.py @@ -0,0 +1,239 @@ +# python3.7 +"""Contains the class of dataset.""" + +import os +import pickle +import string +import zipfile +import numpy as np +import cv2 +import lmdb + +import torch +from torch.utils.data import Dataset + +from .transforms import progressive_resize_image +from .transforms import crop_resize_image +from .transforms import resize_image +from .transforms import normalize_image + +try: + import turbojpeg + BASE_DIR = os.path.dirname(os.path.relpath(__file__)) + LIBRARY_NAME = 'libturbojpeg.so.0' + LIBRARY_PATH = os.path.join(BASE_DIR, LIBRARY_NAME) + jpeg = turbojpeg.TurboJPEG(LIBRARY_PATH) +except ImportError: + jpeg = None + +__all__ = ['BaseDataset'] + +_FORMATS_ALLOWED = ['dir', 'lmdb', 'list', 'zip'] + + +class ZipLoader(object): + """Defines a class to load zip file. + + This is a static class, which is used to solve the problem that different + data workers can not share the same memory. + """ + files = dict() + + @staticmethod + def get_zipfile(file_path): + """Fetches a zip file.""" + zip_files = ZipLoader.files + if file_path not in zip_files: + zip_files[file_path] = zipfile.ZipFile(file_path, 'r') + return zip_files[file_path] + + @staticmethod + def get_image(file_path, image_path): + """Decodes an image from a particular zip file.""" + zip_file = ZipLoader.get_zipfile(file_path) + image_str = zip_file.read(image_path) + image_np = np.frombuffer(image_str, np.uint8) + image = cv2.imdecode(image_np, cv2.IMREAD_COLOR) + return image + + +class LmdbLoader(object): + """Defines a class to load lmdb file. + + This is a static class, which is used to solve lmdb loading error + when num_workers > 0 + """ + files = dict() + + @staticmethod + def get_lmdbfile(file_path): + """Fetches a lmdb file""" + lmdb_files = LmdbLoader.files + if 'env' not in lmdb_files: + env = lmdb.open(file_path, + max_readers=1, + readonly=True, + lock=False, + readahead=False, + meminit=False) + with env.begin(write=False) as txn: + num_samples = txn.stat()['entries'] + cache_file = '_cache_' + ''.join( + c for c in file_path if c in string.ascii_letters) + if os.path.isfile(cache_file): + keys = pickle.load(open(cache_file, "rb")) + else: + with env.begin(write=False) as txn: + keys = [key for key, _ in txn.cursor()] + pickle.dump(keys, open(cache_file, "wb")) + lmdb_files['env'] = env + lmdb_files['num_samples'] = num_samples + lmdb_files['keys'] = keys + return lmdb_files + + @staticmethod + def get_image(file_path, idx): + """Decodes an image from a particular lmdb file""" + lmdb_files = LmdbLoader.get_lmdbfile(file_path) + env = lmdb_files['env'] + keys = lmdb_files['keys'] + with env.begin(write=False) as txn: + imagebuf = txn.get(keys[idx]) + image_np = np.frombuffer(imagebuf, np.uint8) + image = cv2.imdecode(image_np, cv2.IMREAD_COLOR) + return image + + +class BaseDataset(Dataset): + """Defines the base dataset class. + + This class supports loading data from a full-of-image folder, a lmdb + database, or an image list. Images will be pre-processed based on the given + `transform` function before fed into the data loader. + + NOTE: The loaded data will be returned as a directory, where there must be + a key `image`. + """ + def __init__(self, + root_dir, + resolution, + data_format='dir', + image_list_path=None, + mirror=0.0, + progressive_resize=True, + crop_resize_resolution=-1, + transform=normalize_image, + transform_kwargs=None, + **_unused_kwargs): + """Initializes the dataset. + + Args: + root_dir: Root directory containing the dataset. + resolution: The resolution of the returned image. + data_format: Format the dataset is stored. Supports `dir`, `lmdb`, + and `list`. (default: `dir`) + image_list_path: Path to the image list. This field is required if + `data_format` is `list`. (default: None) + mirror: The probability to do mirror augmentation. (default: 0.0) + progressive_resize: Whether to resize images progressively. + (default: True) + crop_resize_resolution: The resolution of the output after crop + and resize. (default: -1) + transform: The transform function for pre-processing. + (default: `datasets.transforms.normalize_image()`) + transform_kwargs: The additional arguments for the `transform` + function. (default: None) + + Raises: + ValueError: If the input `data_format` is not supported. + NotImplementedError: If the input `data_format` is not implemented. + """ + if data_format.lower() not in _FORMATS_ALLOWED: + raise ValueError(f'Invalid data format `{data_format}`!\n' + f'Supported formats: {_FORMATS_ALLOWED}.') + + self.root_dir = root_dir + self.resolution = resolution + self.data_format = data_format.lower() + self.image_list_path = image_list_path + self.mirror = np.clip(mirror, 0.0, 1.0) + self.progressive_resize = progressive_resize + self.crop_resize_resolution = crop_resize_resolution + self.transform = transform + self.transform_kwargs = transform_kwargs or dict() + + if self.data_format == 'dir': + self.image_paths = sorted(os.listdir(self.root_dir)) + self.num_samples = len(self.image_paths) + elif self.data_format == 'lmdb': + lmdb_file = LmdbLoader.get_lmdbfile(self.root_dir) + self.num_samples = lmdb_file['num_samples'] + elif self.data_format == 'list': + self.metas = [] + assert os.path.isfile(self.image_list_path) + with open(self.image_list_path) as f: + for line in f: + fields = line.rstrip().split(' ') + if len(fields) == 1: + self.metas.append((fields[0], None)) + else: + assert len(fields) == 2 + self.metas.append((fields[0], int(fields[1]))) + self.num_samples = len(self.metas) + elif self.data_format == 'zip': + zip_file = ZipLoader.get_zipfile(self.root_dir) + image_paths = [f for f in zip_file.namelist() + if ('.jpg' in f or '.jpeg' in f or '.png' in f)] + self.image_paths = sorted(image_paths) + self.num_samples = len(self.image_paths) + else: + raise NotImplementedError(f'Not implemented data format ' + f'`{self.data_format}`!') + + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + data = dict() + + # Load data. + if self.data_format == 'dir': + image_path = self.image_paths[idx] + try: + in_file = open(os.path.join(self.root_dir, image_path), 'rb') + image = jpeg.decode(in_file.read()) + except: # pylint: disable=bare-except + image = cv2.imread(os.path.join(self.root_dir, image_path)) + elif self.data_format == 'lmdb': + image = LmdbLoader.get_image(self.root_dir, idx) + elif self.data_format == 'list': + image_path, label = self.metas[idx] + image = cv2.imread(os.path.join(self.root_dir, image_path)) + label = None if label is None else torch.LongTensor(label) + # data.update({'label': label}) + elif self.data_format == 'zip': + image_path = self.image_paths[idx] + image = ZipLoader.get_image(self.root_dir, image_path) + else: + raise NotImplementedError(f'Not implemented data format ' + f'`{self.data_format}`!') + + image = image[:, :, ::-1] # Converts BGR (cv2) to RGB. + + # Transform image. + if self.crop_resize_resolution > 0: + image = crop_resize_image(image, self.crop_resize_resolution) + if self.progressive_resize: + image = progressive_resize_image(image, self.resolution) + image = image.transpose(2, 0, 1).astype(np.float32) + if np.random.uniform() < self.mirror: + image = image[:, :, ::-1] # CHW + image = torch.FloatTensor(image.copy()) + if not self.progressive_resize: + image = resize_image(image, self.resolution) + + if self.transform is not None: + image = self.transform(image, **self.transform_kwargs) + data.update({'image': image}) + + return data diff --git a/ContraCLIP/models/genforce/datasets/distributed_sampler.py b/ContraCLIP/models/genforce/datasets/distributed_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..b62feb15113f20171b8797f2bdeff1eff9b4724d --- /dev/null +++ b/ContraCLIP/models/genforce/datasets/distributed_sampler.py @@ -0,0 +1,144 @@ +# python3.7 +"""Contains the distributed data sampler. + +This file is mostly borrowed from `torch/utils/data/distributed.py`. + +However, sometimes, initialize the data loader and data sampler can be time +consuming (since it will load a large amount of data at one time). To avoid +re-initializing the data loader again and again, we modified the sampler to +support loading the data for only one time and then repeating the data loader. +Please use the class member `repeat` to control how many times you want the +data load to repeat. After `repeat` times, the data will be re-loaded. + +NOTE: The number of repeat times should not be very large, especially when there +are too many samples in the dataset. We recommend to set `repeat = 500` for +datasets with ~50K samples. +""" + +# pylint: disable=line-too-long + +import math +from typing import TypeVar, Optional, Iterator + +import torch +from torch.utils.data import Sampler, Dataset +import torch.distributed as dist + + +T_co = TypeVar('T_co', covariant=True) + + +class DistributedSampler(Sampler): + r"""Sampler that restricts data loading to a subset of the dataset. + + It is especially useful in conjunction with + :class:`torch.nn.parallel.DistributedDataParallel`. In such a case, each + process can pass a :class:`~torch.utils.data.DistributedSampler` instance as a + :class:`~torch.utils.data.DataLoader` sampler, and load a subset of the + original dataset that is exclusive to it. + + .. note:: + Dataset is assumed to be of constant size. + + Arguments: + dataset: Dataset used for sampling. + num_replicas (int, optional): Number of processes participating in + distributed training. By default, :attr:`rank` is retrieved from the + current distributed group. + rank (int, optional): Rank of the current process within :attr:`num_replicas`. + By default, :attr:`rank` is retrieved from the current distributed + group. + shuffle (bool, optional): If ``True`` (default), sampler will shuffle the + indices. + seed (int, optional): random seed used to shuffle the sampler if + :attr:`shuffle=True`. This number should be identical across all + processes in the distributed group. Default: ``0``. + drop_last (bool, optional): if ``True``, then the sampler will drop the + tail of the data to make it evenly divisible across the number of + replicas. If ``False``, the sampler will add extra indices to make + the data evenly divisible across the replicas. Default: ``False``. + current_iter (int, optional): Number of current iteration. Default: ``0``. + repeat (int, optional): Repeating number of the whole dataloader. Default: ``1000``. + + .. warning:: + In distributed mode, calling the :meth:`set_epoch` method at + the beginning of each epoch **before** creating the :class:`DataLoader` iterator + is necessary to make shuffling work properly across multiple epochs. Otherwise, + the same ordering will be always used. + + """ + + def __init__(self, dataset: Dataset, num_replicas: Optional[int] = None, + rank: Optional[int] = None, shuffle: bool = True, + seed: int = 0, drop_last: bool = False, current_iter: int = 0, + repeat: int = 1000) -> None: + super().__init__(None) + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.iter = current_iter + self.drop_last = drop_last + + # NOTE: self.dataset_length is `repeat X len(self.dataset)` + self.repeat = repeat + self.dataset_length = len(self.dataset) * self.repeat + + if self.drop_last and self.dataset_length % self.num_replicas != 0: + # Split to nearest available length that is evenly divisible. + # This is to ensure each rank receives the same amount of data when + # using this Sampler. + self.num_samples = math.ceil( + (self.dataset_length - self.num_replicas) / self.num_replicas + ) + else: + self.num_samples = math.ceil(self.dataset_length / self.num_replicas) + + + self.total_size = self.num_samples * self.num_replicas + self.shuffle = shuffle + self.seed = seed + self.__generate_indices__() + + def __generate_indices__(self) -> None: + g = torch.Generator() + indices_bank = [] + for iter_ in range(self.iter, self.iter + self.repeat): + g.manual_seed(self.seed + iter_) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + indices_bank.extend(indices) + self.indices = indices_bank + + def __iter__(self) -> Iterator[T_co]: + if self.shuffle: + # deterministically shuffle based on iter and seed + indices = self.indices + else: + indices = list(range(self.dataset_length)) + + if not self.drop_last: + # add extra samples to make it evenly divisible + indices += indices[:(self.total_size - len(indices))] + else: + # remove tail of data to make it evenly divisible. + indices = indices[:self.total_size] + + # subsample + indices = indices[self.rank:self.total_size:self.num_replicas] + return iter(indices) + + def __len__(self) -> int: + return self.num_samples + + def __reset__(self, iteration: int) -> None: + self.iter = iteration + self.__generate_indices__() + +# pylint: enable=line-too-long diff --git a/ContraCLIP/models/genforce/datasets/libturbojpeg.so.0 b/ContraCLIP/models/genforce/datasets/libturbojpeg.so.0 new file mode 100644 index 0000000000000000000000000000000000000000..defe7aecf701a012abe16715a3eb9cfd0d04b16e Binary files /dev/null and b/ContraCLIP/models/genforce/datasets/libturbojpeg.so.0 differ diff --git a/ContraCLIP/models/genforce/datasets/transforms.py b/ContraCLIP/models/genforce/datasets/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..bae766a4634ebb3c6e50f3727f59f55bb350ec5a --- /dev/null +++ b/ContraCLIP/models/genforce/datasets/transforms.py @@ -0,0 +1,201 @@ +"""Contains transform functions.""" + +import cv2 +import numpy as np +import PIL.Image + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +__all__ = [ + 'crop_resize_image', 'progressive_resize_image', 'resize_image', + 'normalize_image', 'normalize_latent_code', 'ImageResizing', + 'ImageNormalization', 'LatentCodeNormalization', +] + + +def crop_resize_image(image, size): + """Crops a square patch and then resizes it to the given size. + + Args: + image: The input image to crop and resize. + size: An integer, indicating the target size. + + Returns: + An image with target size. + + Raises: + TypeError: If the input `image` is not with type `numpy.ndarray`. + ValueError: If the input `image` is not with shape [H, W, C]. + """ + if not isinstance(image, np.ndarray): + raise TypeError(f'Input image should be with type `numpy.ndarray`, ' + f'but `{type(image)}` is received!') + if image.ndim != 3: + raise ValueError(f'Input image should be with shape [H, W, C], ' + f'but `{image.shape}` is received!') + + height, width, channel = image.shape + short_side = min(height, width) + image = image[(height - short_side) // 2:(height + short_side) // 2, + (width - short_side) // 2:(width + short_side) // 2] + pil_image = PIL.Image.fromarray(image) + pil_image = pil_image.resize((size, size), PIL.Image.ANTIALIAS) + image = np.asarray(pil_image) + assert image.shape == (size, size, channel) + return image + + +def progressive_resize_image(image, size): + """Resizes image to target size progressively. + + Different from normal resize, this function will reduce the image size + progressively. In each step, the maximum reduce factor is 2. + + NOTE: This function can only handle square images, and can only be used for + downsampling. + + Args: + image: The input (square) image to resize. + size: An integer, indicating the target size. + + Returns: + An image with target size. + + Raises: + TypeError: If the input `image` is not with type `numpy.ndarray`. + ValueError: If the input `image` is not with shape [H, W, C]. + """ + if not isinstance(image, np.ndarray): + raise TypeError(f'Input image should be with type `numpy.ndarray`, ' + f'but `{type(image)}` is received!') + if image.ndim != 3: + raise ValueError(f'Input image should be with shape [H, W, C], ' + f'but `{image.shape}` is received!') + + height, width, channel = image.shape + assert height == width + assert height >= size + num_iters = int(np.log2(height) - np.log2(size)) + for _ in range(num_iters): + height = max(height // 2, size) + image = cv2.resize(image, (height, height), + interpolation=cv2.INTER_LINEAR) + assert image.shape == (size, size, channel) + return image + + +def resize_image(image, size): + """Resizes image to target size. + + NOTE: We use adaptive average pooing for image resizing. Instead of bilinear + interpolation, average pooling is able to acquire information from more + pixels, such that the resized results can be with higher quality. + + Args: + image: The input image tensor, with shape [C, H, W], to resize. + size: An integer or a tuple of integer, indicating the target size. + + Returns: + An image tensor with target size. + + Raises: + TypeError: If the input `image` is not with type `torch.Tensor`. + ValueError: If the input `image` is not with shape [C, H, W]. + """ + if not isinstance(image, torch.Tensor): + raise TypeError(f'Input image should be with type `torch.Tensor`, ' + f'but `{type(image)}` is received!') + if image.ndim != 3: + raise ValueError(f'Input image should be with shape [C, H, W], ' + f'but `{image.shape}` is received!') + + image = F.adaptive_avg_pool2d(image.unsqueeze(0), size).squeeze(0) + return image + + +def normalize_image(image, mean=127.5, std=127.5): + """Normalizes image by subtracting mean and dividing std. + + Args: + image: The input image tensor to normalize. + mean: The mean value to subtract from the input tensor. (default: 127.5) + std: The standard deviation to normalize the input tensor. (default: + 127.5) + + Returns: + A normalized image tensor. + + Raises: + TypeError: If the input `image` is not with type `torch.Tensor`. + """ + if not isinstance(image, torch.Tensor): + raise TypeError(f'Input image should be with type `torch.Tensor`, ' + f'but `{type(image)}` is received!') + out = (image - mean) / std + return out + + +def normalize_latent_code(latent_code, adjust_norm=True): + """Normalizes latent code. + + NOTE: The latent code will always be normalized along the last axis. + Meanwhile, if `adjust_norm` is set as `True`, the norm of the result will be + adjusted to `sqrt(latent_code.shape[-1])` in order to avoid too small value. + + Args: + latent_code: The input latent code tensor to normalize. + adjust_norm: Whether to adjust the norm of the output. (default: True) + + Returns: + A normalized latent code tensor. + + Raises: + TypeError: If the input `latent_code` is not with type `torch.Tensor`. + """ + if not isinstance(latent_code, torch.Tensor): + raise TypeError(f'Input latent code should be with type ' + f'`torch.Tensor`, but `{type(latent_code)}` is ' + f'received!') + dim = latent_code.shape[-1] + norm = latent_code.pow(2).sum(-1, keepdim=True).pow(0.5) + out = latent_code / norm + if adjust_norm: + out = out * (dim ** 0.5) + return out + + +class ImageResizing(nn.Module): + """Implements the image resizing layer.""" + + def __init__(self, size): + super().__init__() + self.size = size + + def forward(self, image): + return resize_image(image, self.size) + + +class ImageNormalization(nn.Module): + """Implements the image normalization layer.""" + + def __init__(self, mean=127.5, std=127.5): + super().__init__() + self.mean = mean + self.std = std + + def forward(self, image): + return normalize_image(image, self.mean, self.std) + + +class LatentCodeNormalization(nn.Module): + """Implements the latent code normalization layer.""" + + def __init__(self, adjust_norm=True): + super().__init__() + self.adjust_norm = adjust_norm + + def forward(self, latent_code): + return normalize_latent_code(latent_code, self.adjust_norm) diff --git a/ContraCLIP/models/genforce/docs/synthesize_demo.ipynb b/ContraCLIP/models/genforce/docs/synthesize_demo.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c9d5f01900d4861b878a21c210090627832e22bc --- /dev/null +++ b/ContraCLIP/models/genforce/docs/synthesize_demo.ipynb @@ -0,0 +1,206 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "synthesize_demo", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qJDJLE3v0HNr" + }, + "source": [ + "# Fetch Codebase and Install Environment" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qy1nwGJV5JuG" + }, + "source": [ + "import os\n", + "os.chdir('/content')\n", + "CODE_DIR = 'GenForce'\n", + "!git clone https://github.com/genforce/genforce.git $CODE_DIR\n", + "os.chdir(f'./{CODE_DIR}')\n", + "!pip install -r requirements.txt > installation_output.txt" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qh5DFyyg0Ntm" + }, + "source": [ + "# Define Utility Functions" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qcSdJW5V0M-8" + }, + "source": [ + "import os\n", + "import subprocess\n", + "import io\n", + "import IPython.display\n", + "import numpy as np\n", + "import PIL.Image\n", + "\n", + "import torch\n", + "\n", + "from models import MODEL_ZOO\n", + "from models import build_generator\n", + "from utils.visualizer import fuse_images\n", + "\n", + "\n", + "def postprocess(images):\n", + " \"\"\"Post-processes images from `torch.Tensor` to `numpy.ndarray`.\"\"\"\n", + " images = images.detach().cpu().numpy()\n", + " images = (images + 1) * 255 / 2\n", + " images = np.clip(images + 0.5, 0, 255).astype(np.uint8)\n", + " images = images.transpose(0, 2, 3, 1)\n", + " return images\n", + "\n", + "\n", + "def build(model_name):\n", + " \"\"\"Builds generator and load pre-trained weights.\"\"\"\n", + " model_config = MODEL_ZOO[model_name].copy()\n", + " url = model_config.pop('url') # URL to download model if needed.\n", + "\n", + " # Build generator.\n", + " print(f'Building generator for model `{model_name}` ...')\n", + " generator = build_generator(**model_config)\n", + " print(f'Finish building generator.')\n", + "\n", + " # Load pre-trained weights.\n", + " os.makedirs('checkpoints', exist_ok=True)\n", + " checkpoint_path = os.path.join('checkpoints', model_name + '.pth')\n", + " print(f'Loading checkpoint from `{checkpoint_path}` ...')\n", + " if not os.path.exists(checkpoint_path):\n", + " print(f' Downloading checkpoint from `{url}` ...')\n", + " subprocess.call(['wget', '--quiet', '-O', checkpoint_path, url])\n", + " print(f' Finish downloading checkpoint.')\n", + " checkpoint = torch.load(checkpoint_path, map_location='cpu')\n", + " if 'generator_smooth' in checkpoint:\n", + " generator.load_state_dict(checkpoint['generator_smooth'])\n", + " else:\n", + " generator.load_state_dict(checkpoint['generator'])\n", + " generator = generator.cuda()\n", + " generator.eval()\n", + " print(f'Finish loading checkpoint.')\n", + " return generator\n", + "\n", + "\n", + "def synthesize(generator, num, synthesis_kwargs=None, batch_size=1, seed=0):\n", + " \"\"\"Synthesize images.\"\"\"\n", + " assert num > 0 and batch_size > 0\n", + " synthesis_kwargs = synthesis_kwargs or dict()\n", + "\n", + " # Set random seed.\n", + " np.random.seed(seed)\n", + " torch.manual_seed(seed)\n", + "\n", + " # Sample and synthesize.\n", + " outputs = []\n", + " for idx in range(0, num, batch_size):\n", + " batch = min(batch_size, num - idx)\n", + " code = torch.randn(batch, generator.z_space_dim).cuda()\n", + " with torch.no_grad():\n", + " images = generator(code, **synthesis_kwargs)['image']\n", + " images = postprocess(images)\n", + " outputs.append(images)\n", + " return np.concatenate(outputs, axis=0)\n", + "\n", + "\n", + "def imshow(images, viz_size=256, col=0, spacing=0):\n", + " \"\"\"Shows images in one figure.\"\"\"\n", + " fused_image = fuse_images(\n", + " images,\n", + " col=col,\n", + " image_size=viz_size,\n", + " row_spacing=spacing,\n", + " col_spacing=spacing\n", + " )\n", + " fused_image = np.asarray(fused_image, dtype=np.uint8)\n", + " data = io.BytesIO()\n", + " PIL.Image.fromarray(fused_image).save(data, 'jpeg')\n", + " im_data = data.getvalue()\n", + " disp = IPython.display.display(IPython.display.Image(im_data))\n", + " return disp" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rIrseINa879H" + }, + "source": [ + "# Select a Model" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RyoJmv-PtZo_" + }, + "source": [ + "#@title { display-mode: \"form\", run: \"auto\" }\n", + "\n", + "model_name = \"stylegan_diningroom256\" #@param ['pggan_celebahq1024', 'pggan_bedroom256', 'pggan_livingroom256', 'pggan_diningroom256', 'pggan_kitchen256', 'pggan_churchoutdoor256', 'pggan_tower256', 'pggan_bridge256', 'pggan_restaurant256', 'pggan_classroom256', 'pggan_conferenceroom256', 'pggan_person256', 'pggan_cat256', 'pggan_dog256', 'pggan_bird256', 'pggan_horse256', 'pggan_sheep256', 'pggan_cow256', 'pggan_car256', 'pggan_bicycle256', 'pggan_motorbike256', 'pggan_bus256', 'pggan_train256', 'pggan_boat256', 'pggan_airplane256', 'pggan_bottle256', 'pggan_chair256', 'pggan_pottedplant256', 'pggan_tvmonitor256', 'pggan_diningtable256', 'pggan_sofa256', 'stylegan_ffhq1024', 'stylegan_celebahq1024', 'stylegan_bedroom256', 'stylegan_cat256', 'stylegan_car512', 'stylegan_celeba_partial256', 'stylegan_ffhq256', 'stylegan_ffhq512', 'stylegan_livingroom256', 'stylegan_diningroom256', 'stylegan_kitchen256', 'stylegan_apartment256', 'stylegan_church256', 'stylegan_tower256', 'stylegan_bridge256', 'stylegan_restaurant256', 'stylegan_classroom256', 'stylegan_conferenceroom256', 'stylegan_animeface512', 'stylegan_animeportrait512', 'stylegan_artface512', 'stylegan2_ffhq1024', 'stylegan2_church256', 'stylegan2_cat256', 'stylegan2_horse256', 'stylegan2_car512']\n", + "\n", + "generator = build(model_name)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RsGPMc5E8_jn" + }, + "source": [ + "# Synthesize" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jPkIXKxp4-7L" + }, + "source": [ + "#@title { display-mode: \"form\", run: \"auto\" }\n", + "\n", + "num_samples = 8 #@param {type:\"slider\", min:1, max:20, step:1}\n", + "noise_seed = 488 #@param {type:\"slider\", min:0, max:1000, step:1}\n", + "truncation = 1 #@param {type:\"slider\", min:0.0, max:1, step:0.02}\n", + "truncation_layers = 3 #@param {type:\"slider\", min:0, max:18, step:1}\n", + "randomize_noise = 'false' #@param ['true', 'false']\n", + "\n", + "synthesis_kwargs = dict(trunc_psi=1 - truncation,\n", + " trunc_layers=truncation_layers,\n", + " randomize_noise=randomize_noise)\n", + "images = synthesize(generator, num_samples, synthesis_kwargs, seed=noise_seed)\n", + "imshow(images)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/ContraCLIP/models/genforce/metrics/README.md b/ContraCLIP/models/genforce/metrics/README.md new file mode 100644 index 0000000000000000000000000000000000000000..34a81ac363a59d38e1c177c3e1ee4983dfe7ac96 --- /dev/null +++ b/ContraCLIP/models/genforce/metrics/README.md @@ -0,0 +1,18 @@ +# Evaluation Metrics + +Frechet Inception Distance (FID) is commonly used to evaluate generative model. It employs an [Inception Model](https://arxiv.org/abs/1512.00567) (pretrained on ImageNet) to extract features from both real and synthesized images. + +## Inception Model + +For [PGGAN](https://github.com/tkarras/progressive_growing_of_gans), [StyleGAN](https://github.com/NVlabs/stylegan), etc, they use inception model from the [TensorFlow Models](https://github.com/tensorflow/models) repository, whose implementation is slightly different from that of `torchvision`. Hence, to make the evaluation metric comparable between different training frameworks (i.e., PyTorch and TensorFlow), we modify `torchvision/models/inception.py` as `inception.py`. The ported pre-trained weight is borrowed from [this repo](https://github.com/mseitzer/pytorch-fid). + +**NOTE:** We also support using the model from `torchvision` to compute the FID. However, please be aware that the FID value from `torchvision` is usually ~1.5 smaller than that from the TensorFlow model. + +Please use the following code to choose which model to use. + +```python +from metrics.inception import build_inception_model + +inception_model_tf = build_inception_model(align_tf=True) +inception_model_pth = build_inception_model(align_tf=False) +``` diff --git a/ContraCLIP/models/genforce/metrics/__init__.py b/ContraCLIP/models/genforce/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ContraCLIP/models/genforce/metrics/fid.py b/ContraCLIP/models/genforce/metrics/fid.py new file mode 100644 index 0000000000000000000000000000000000000000..96676b8f880cc78b7c9efc98b4d6d69aa6c399ba --- /dev/null +++ b/ContraCLIP/models/genforce/metrics/fid.py @@ -0,0 +1,59 @@ +# python3.7 +"""Contains the functions to compute Frechet Inception Distance (FID). + +FID metric is introduced in paper + +GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash +Equilibrium. Heusel et al. NeurIPS 2017. + +See details at https://arxiv.org/pdf/1706.08500.pdf +""" + +import numpy as np +import scipy.linalg + +__all__ = ['extract_feature', 'compute_fid'] + + +def extract_feature(inception_model, images): + """Extracts feature from input images with given model. + + NOTE: The input images are assumed to be with pixel range [-1, 1]. + + Args: + inception_model: The model used to extract features. + images: The input image tensor to extract features from. + + Returns: + A `numpy.ndarray`, containing the extracted features. + """ + features = inception_model(images, output_logits=False) + features = features.detach().cpu().numpy() + assert features.ndim == 2 and features.shape[1] == 2048 + return features + + +def compute_fid(fake_features, real_features): + """Computes FID based on the features extracted from fake and real data. + + Given the mean and covariance (m_f, C_f) of fake data and (m_r, C_r) of real + data, the FID metric can be computed by + + d^2 = ||m_f - m_r||_2^2 + Tr(C_f + C_r - 2(C_f C_r)^0.5) + + Args: + fake_features: The features extracted from fake data. + real_features: The features extracted from real data. + + Returns: + A real number, suggesting the FID value. + """ + + m_f = np.mean(fake_features, axis=0) + C_f = np.cov(fake_features, rowvar=False) + m_r = np.mean(real_features, axis=0) + C_r = np.cov(real_features, rowvar=False) + + fid = np.sum((m_f - m_r) ** 2) + np.trace( + C_f + C_r - 2 * scipy.linalg.sqrtm(np.dot(C_f, C_r))) + return np.real(fid) diff --git a/ContraCLIP/models/genforce/metrics/inception.py b/ContraCLIP/models/genforce/metrics/inception.py new file mode 100644 index 0000000000000000000000000000000000000000..74b5a1cfd4f4cf70103e044d5473236a42cf34c8 --- /dev/null +++ b/ContraCLIP/models/genforce/metrics/inception.py @@ -0,0 +1,520 @@ +# python3.7 +"""Contains the Inception V3 model. + +This file is mostly borrowed from `torchvision/models/inception.py`. + +Inception model is widely used to compute FID or IS metric for evaluating +generative models. However, the pre-trained models from torchvision is slightly +different from the TensorFlow version. + +http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + +In particular: + +(1) The number of classes in TensorFlow model is 1008 instead of 1000. +(2) The avg_pool() layers in TensorFlow model does not include the padded zero. +(3) The last Inception E Block in TensorFlow model use max_pool() instead of + avg_pool(). + +Hence, to algin the evaluation results with those from TensorFlow +implementation, we modified the inception model to support both versions. Please +use `align_tf` argument to control the version. +""" + +# pylint: disable=line-too-long +# pylint: disable=missing-function-docstring +# pylint: disable=missing-class-docstring +# pylint: disable=super-with-arguments +# pylint: disable=consider-merging-isinstance +# pylint: disable=import-outside-toplevel +# pylint: disable=no-else-return + +from collections import namedtuple +import warnings +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.jit.annotations import Optional +from torch import Tensor +from torchvision.models.utils import load_state_dict_from_url + + +__all__ = ['build_inception_model', 'Inception3', 'inception_v3', 'InceptionOutputs', '_InceptionOutputs'] + +model_urls = { + # Inception v3 ported from TensorFlow + 'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth', + + # Inception v3 ported from http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz + # This model is provided by https://github.com/mseitzer/pytorch-fid + 'tf_inception_v3': 'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth' +} + +InceptionOutputs = namedtuple('InceptionOutputs', ['logits', 'aux_logits']) +InceptionOutputs.__annotations__ = {'logits': torch.Tensor, 'aux_logits': Optional[torch.Tensor]} + +# Script annotations failed with _GoogleNetOutputs = namedtuple ... +# _InceptionOutputs set here for backwards compat +_InceptionOutputs = InceptionOutputs + + +def build_inception_model(align_tf=True): + """Builds Inception V3 model. + + This model is particular used for inference, such that `requires_grad` and + `mode` will both be set as `False`. + + Args: + align_tf: Whether to align the implementation with TensorFlow version. (default: True) + + Returns: + A `torch.nn.Module` with pre-trained weight. + """ + if align_tf: + num_classes = 1008 + model_url = model_urls['tf_inception_v3'] + else: + num_classes = 1000 + model_url = model_urls['inception_v3_google'] + model = Inception3(num_classes=num_classes, + aux_logits=False, + transform_input=False, + align_tf=align_tf) + state_dict = load_state_dict_from_url(model_url) + model.load_state_dict(state_dict, strict=False) + model.eval() + for param in model.parameters(): + param.requires_grad = False + return model + + +def inception_v3(pretrained=False, progress=True, **kwargs): + r"""Inception v3 model architecture from + `"Rethinking the Inception Architecture for Computer Vision" `_. + + .. note:: + **Important**: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + aux_logits (bool): If True, add an auxiliary branch that can improve training. + Default: *True* + transform_input (bool): If True, preprocesses the input according to the method with which it + was trained on ImageNet. Default: *False* + """ + if pretrained: + if 'transform_input' not in kwargs: + kwargs['transform_input'] = True + if 'aux_logits' in kwargs: + original_aux_logits = kwargs['aux_logits'] + kwargs['aux_logits'] = True + else: + original_aux_logits = True + model = Inception3(**kwargs) + state_dict = load_state_dict_from_url(model_urls['inception_v3_google'], + progress=progress) + model.load_state_dict(state_dict) + if not original_aux_logits: + model.aux_logits = False + del model.AuxLogits + return model + + return Inception3(**kwargs) + + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, + inception_blocks=None, init_weights=True, align_tf=True): + super(Inception3, self).__init__() + if inception_blocks is None: + inception_blocks = [ + BasicConv2d, InceptionA, InceptionB, InceptionC, + InceptionD, InceptionE, InceptionAux + ] + assert len(inception_blocks) == 7 + conv_block = inception_blocks[0] + inception_a = inception_blocks[1] + inception_b = inception_blocks[2] + inception_c = inception_blocks[3] + inception_d = inception_blocks[4] + inception_e = inception_blocks[5] + inception_aux = inception_blocks[6] + + self.aux_logits = aux_logits + self.transform_input = transform_input + self.align_tf = align_tf + self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2) + self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1) + self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3) + self.Mixed_5b = inception_a(192, pool_features=32, align_tf=self.align_tf) + self.Mixed_5c = inception_a(256, pool_features=64, align_tf=self.align_tf) + self.Mixed_5d = inception_a(288, pool_features=64, align_tf=self.align_tf) + self.Mixed_6a = inception_b(288) + self.Mixed_6b = inception_c(768, channels_7x7=128, align_tf=self.align_tf) + self.Mixed_6c = inception_c(768, channels_7x7=160, align_tf=self.align_tf) + self.Mixed_6d = inception_c(768, channels_7x7=160, align_tf=self.align_tf) + self.Mixed_6e = inception_c(768, channels_7x7=192, align_tf=self.align_tf) + if aux_logits: + self.AuxLogits = inception_aux(768, num_classes) + self.Mixed_7a = inception_d(768) + self.Mixed_7b = inception_e(1280, align_tf=self.align_tf) + self.Mixed_7c = inception_e(2048, use_max_pool=self.align_tf) + self.fc = nn.Linear(2048, num_classes) + if init_weights: + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + stddev = m.stddev if hasattr(m, 'stddev') else 0.1 + X = stats.truncnorm(-2, 2, scale=stddev) + values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) + values = values.view(m.weight.size()) + with torch.no_grad(): + m.weight.copy_(values) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _transform_input(self, x): + if self.transform_input: + x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + x = torch.cat((x_ch0, x_ch1, x_ch2), 1) + return x + + def _forward(self, x, output_logits=False): + # Upsample if necessary + if x.shape[2] != 299 or x.shape[3] != 299: + x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=False) + + # N x 3 x 299 x 299 + x = self.Conv2d_1a_3x3(x) + # N x 32 x 149 x 149 + x = self.Conv2d_2a_3x3(x) + # N x 32 x 147 x 147 + x = self.Conv2d_2b_3x3(x) + # N x 64 x 147 x 147 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # N x 64 x 73 x 73 + x = self.Conv2d_3b_1x1(x) + # N x 80 x 73 x 73 + x = self.Conv2d_4a_3x3(x) + # N x 192 x 71 x 71 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # N x 192 x 35 x 35 + x = self.Mixed_5b(x) + # N x 256 x 35 x 35 + x = self.Mixed_5c(x) + # N x 288 x 35 x 35 + x = self.Mixed_5d(x) + # N x 288 x 35 x 35 + x = self.Mixed_6a(x) + # N x 768 x 17 x 17 + x = self.Mixed_6b(x) + # N x 768 x 17 x 17 + x = self.Mixed_6c(x) + # N x 768 x 17 x 17 + x = self.Mixed_6d(x) + # N x 768 x 17 x 17 + x = self.Mixed_6e(x) + # N x 768 x 17 x 17 + aux_defined = self.training and self.aux_logits + if aux_defined: + aux = self.AuxLogits(x) + else: + aux = None + # N x 768 x 17 x 17 + x = self.Mixed_7a(x) + # N x 1280 x 8 x 8 + x = self.Mixed_7b(x) + # N x 2048 x 8 x 8 + x = self.Mixed_7c(x) + # N x 2048 x 8 x 8 + # Adaptive average pooling + x = F.adaptive_avg_pool2d(x, (1, 1)) + # N x 2048 x 1 x 1 + x = F.dropout(x, training=self.training) + # N x 2048 x 1 x 1 + x = torch.flatten(x, 1) + # N x 2048 + if output_logits: + x = self.fc(x) + # N x 1000 (num_classes) + return x, aux + + @torch.jit.unused + def eager_outputs(self, x, aux): + # type: (Tensor, Optional[Tensor]) -> InceptionOutputs + if self.training and self.aux_logits: + return InceptionOutputs(x, aux) + else: + return x + + def forward(self, x, output_logits=False): + x = self._transform_input(x) + x, aux = self._forward(x, output_logits) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted Inception3 always returns Inception3 Tuple") + return InceptionOutputs(x, aux) + else: + return self.eager_outputs(x, aux) + + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features, conv_block=None, align_tf=False): + super(InceptionA, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 64, kernel_size=1) + + self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1) + self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2) + + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1) + + self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1) + self.pool_include_padding = not align_tf + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, + count_include_pad=self.pool_include_padding) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionB(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionB, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2) + + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + + outputs = [branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7, conv_block=None, align_tf=False): + super(InceptionC, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 192, kernel_size=1) + + c7 = channels_7x7 + self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + + self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + self.pool_include_padding = not align_tf + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, + count_include_pad=self.pool_include_padding) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionD(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionD, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2) + + self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionE(nn.Module): + + def __init__(self, in_channels, conv_block=None, align_tf=False, use_max_pool=False): + super(InceptionE, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 320, kernel_size=1) + + self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1) + self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + self.pool_include_padding = not align_tf + self.use_max_pool = use_max_pool + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + if self.use_max_pool: + branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1) + else: + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1, + count_include_pad=self.pool_include_padding) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes, conv_block=None): + super(InceptionAux, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.conv0 = conv_block(in_channels, 128, kernel_size=1) + self.conv1 = conv_block(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + self.fc = nn.Linear(768, num_classes) + self.fc.stddev = 0.001 + + def forward(self, x): + # N x 768 x 17 x 17 + x = F.avg_pool2d(x, kernel_size=5, stride=3) + # N x 768 x 5 x 5 + x = self.conv0(x) + # N x 128 x 5 x 5 + x = self.conv1(x) + # N x 768 x 1 x 1 + # Adaptive average pooling + x = F.adaptive_avg_pool2d(x, (1, 1)) + # N x 768 x 1 x 1 + x = torch.flatten(x, 1) + # N x 768 + x = self.fc(x) + # N x 1000 + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm2d(out_channels, eps=0.001) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return F.relu(x, inplace=True) + +# pylint: enable=line-too-long +# pylint: enable=missing-function-docstring +# pylint: enable=missing-class-docstring +# pylint: enable=super-with-arguments +# pylint: enable=consider-merging-isinstance +# pylint: enable=import-outside-toplevel +# pylint: enable=no-else-return diff --git a/ContraCLIP/models/genforce/models/__init__.py b/ContraCLIP/models/genforce/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b6856a4032c2b3ac12033f7a0b987ada8c4fa0e1 --- /dev/null +++ b/ContraCLIP/models/genforce/models/__init__.py @@ -0,0 +1,130 @@ +# python3.7 +"""Collects all available models together.""" + +from .model_zoo import MODEL_ZOO +from .pggan_generator import PGGANGenerator +from .pggan_discriminator import PGGANDiscriminator +from .stylegan_generator import StyleGANGenerator +from .stylegan_discriminator import StyleGANDiscriminator +from .stylegan2_generator import StyleGAN2Generator +from .stylegan2_discriminator import StyleGAN2Discriminator +from .encoder import EncoderNet +from .perceptual_model import PerceptualModel + +__all__ = [ + 'MODEL_ZOO', 'PGGANGenerator', 'PGGANDiscriminator', 'StyleGANGenerator', + 'StyleGANDiscriminator', 'StyleGAN2Generator', 'StyleGAN2Discriminator', + 'EncoderNet', 'PerceptualModel', 'build_generator', 'build_discriminator', + 'build_encoder', 'build_perceptual', 'build_model' +] + +_GAN_TYPES_ALLOWED = ['pggan', 'stylegan', 'stylegan2'] +_MODULES_ALLOWED = ['generator', 'discriminator', 'encoder', 'perceptual'] + + +def build_generator(gan_type, resolution, **kwargs): + """Builds generator by GAN type. + + Args: + gan_type: GAN type to which the generator belong. + resolution: Synthesis resolution. + **kwargs: Additional arguments to build the generator. + + Raises: + ValueError: If the `gan_type` is not supported. + NotImplementedError: If the `gan_type` is not implemented. + """ + if gan_type not in _GAN_TYPES_ALLOWED: + raise ValueError(f'Invalid GAN type: `{gan_type}`!\n' + f'Types allowed: {_GAN_TYPES_ALLOWED}.') + if gan_type == 'pggan': + return PGGANGenerator(resolution, **kwargs) + if gan_type == 'stylegan': + return StyleGANGenerator(resolution, **kwargs) + if gan_type == 'stylegan2': + return StyleGAN2Generator(resolution, **kwargs) + raise NotImplementedError(f'Unsupported GAN type `{gan_type}`!') + + +def build_discriminator(gan_type, resolution, **kwargs): + """Builds discriminator by GAN type. + + Args: + gan_type: GAN type to which the discriminator belong. + resolution: Synthesis resolution. + **kwargs: Additional arguments to build the discriminator. + + Raises: + ValueError: If the `gan_type` is not supported. + NotImplementedError: If the `gan_type` is not implemented. + """ + if gan_type not in _GAN_TYPES_ALLOWED: + raise ValueError(f'Invalid GAN type: `{gan_type}`!\n' + f'Types allowed: {_GAN_TYPES_ALLOWED}.') + + if gan_type == 'pggan': + return PGGANDiscriminator(resolution, **kwargs) + if gan_type == 'stylegan': + return StyleGANDiscriminator(resolution, **kwargs) + if gan_type == 'stylegan2': + return StyleGAN2Discriminator(resolution, **kwargs) + raise NotImplementedError(f'Unsupported GAN type `{gan_type}`!') + + +def build_encoder(gan_type, resolution, **kwargs): + """Builds encoder by GAN type. + + Args: + gan_type: GAN type to which the encoder belong. + resolution: Input resolution for encoder. + **kwargs: Additional arguments to build the encoder. + + Raises: + ValueError: If the `gan_type` is not supported. + NotImplementedError: If the `gan_type` is not implemented. + """ + if gan_type not in _GAN_TYPES_ALLOWED: + raise ValueError(f'Invalid GAN type: `{gan_type}`!\n' + f'Types allowed: {_GAN_TYPES_ALLOWED}.') + + if gan_type in ['stylegan', 'stylegan2']: + return EncoderNet(resolution, **kwargs) + + raise NotImplementedError(f'Unsupported GAN type `{gan_type}` for encoder!') + + +def build_perceptual(**kwargs): + """Builds perceptual model. + + Args: + **kwargs: Additional arguments to build the encoder. + """ + return PerceptualModel(**kwargs) + + +def build_model(gan_type, module, resolution, **kwargs): + """Builds a GAN module (generator/discriminator/etc). + + Args: + gan_type: GAN type to which the model belong. + module: GAN module to build, such as generator or discrimiantor. + resolution: Synthesis resolution. + **kwargs: Additional arguments to build the discriminator. + + Raises: + ValueError: If the `module` is not supported. + NotImplementedError: If the `module` is not implemented. + """ + if module not in _MODULES_ALLOWED: + raise ValueError(f'Invalid module: `{module}`!\n' + f'Modules allowed: {_MODULES_ALLOWED}.') + + if module == 'generator': + return build_generator(gan_type, resolution, **kwargs) + if module == 'discriminator': + return build_discriminator(gan_type, resolution, **kwargs) + if module == 'encoder': + return build_encoder(gan_type, resolution, **kwargs) + if module == 'perceptual': + return build_perceptual(**kwargs) + raise NotImplementedError(f'Unsupported module `{module}`!') diff --git a/ContraCLIP/models/genforce/models/encoder.py b/ContraCLIP/models/genforce/models/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..6527ab37ed3854832a6b2ee0f949519ee39cd91f --- /dev/null +++ b/ContraCLIP/models/genforce/models/encoder.py @@ -0,0 +1,538 @@ +# python 3.7 +"""Contains the implementation of encoder network for GAN inversion. + +The network structure is primarily based on ResNet. The implementation is +modified from `torchvision.models.resnet`. +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = ['EncoderNet'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Final resolution. +_FINAL_RES = 4 + +class BasicBlock(nn.Module): + """Implementation of ResNet BasicBlock.""" + + expansion = 1 + + def __init__(self, + inplanes, + planes, + base_width=64, + stride=1, + groups=1, + dilation=1, + norm_layer=None, + downsample=None): + super().__init__() + if base_width != 64: + raise ValueError(f'BasicBlock of ResNet only supports ' + f'`base_width=64`, but {base_width} received!') + if stride not in [1, 2]: + raise ValueError(f'BasicBlock of ResNet only supports `stride=1` ' + f'and `stride=2`, but {stride} received!') + if groups != 1: + raise ValueError(f'BasicBlock of ResNet only supports `groups=1`, ' + f'but {groups} received!') + if dilation != 1: + raise ValueError(f'BasicBlock of ResNet only supports ' + f'`dilation=1`, but {dilation} received!') + + self.stride = stride + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.conv1 = nn.Conv2d(in_channels=inplanes, + out_channels=planes, + kernel_size=3, + stride=stride, + padding=1, + groups=1, + dilation=1, + bias=False) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(in_channels=planes, + out_channels=planes, + kernel_size=3, + stride=1, + padding=1, + groups=1, + dilation=1, + bias=False) + self.bn2 = norm_layer(planes) + self.downsample = downsample + + def forward(self, x): + identity = self.downsample(x) if self.downsample is not None else x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out + identity) + + return out + + +class Bottleneck(nn.Module): + """Implementation of ResNet Bottleneck.""" + + expansion = 4 + + def __init__(self, + inplanes, + planes, + base_width=64, + stride=1, + groups=1, + dilation=1, + norm_layer=None, + downsample=None): + super().__init__() + if stride not in [1, 2]: + raise ValueError(f'Bottlenet of ResNet only supports `stride=1` ' + f'and `stride=2`, but {stride} received!') + + width = int(planes * (base_width / 64)) * groups + self.stride = stride + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self.conv1 = nn.Conv2d(in_channels=inplanes, + out_channels=width, + kernel_size=1, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=False) + self.bn1 = norm_layer(width) + self.conv2 = nn.Conv2d(in_channels=width, + out_channels=width, + kernel_size=3, + stride=stride, + padding=dilation, + groups=groups, + dilation=dilation, + bias=False) + self.bn2 = norm_layer(width) + self.conv3 = nn.Conv2d(in_channels=width, + out_channels=planes * self.expansion, + kernel_size=1, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=False) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + def forward(self, x): + identity = self.downsample(x) if self.downsample is not None else x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + out = self.relu(out + identity) + + return out + + +class FPN(nn.Module): + """Implementation of Feature Pyramid Network (FPN). + + The input of this module is a pyramid of features with reducing resolutions. + Then, this module fuses these multi-level features from `top_level` to + `bottom_level`. In particular, starting from the `top_level`, each feature + is convoluted, upsampled, and fused into its previous feature (which is also + convoluted). + + Args: + pyramid_channels: A list of integers, each of which indicates the number + of channels of the feature from a particular level. + out_channels: Number of channels for each output. + + Returns: + A list of feature maps, each of which has `out_channels` channels. + """ + + def __init__(self, pyramid_channels, out_channels): + super().__init__() + assert isinstance(pyramid_channels, (list, tuple)) + self.num_levels = len(pyramid_channels) + + self.lateral_conv_list = nn.ModuleList() + self.feature_conv_list = nn.ModuleList() + for i in range(self.num_levels): + in_channels = pyramid_channels[i] + self.lateral_conv_list.append(nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + bias=True)) + self.feature_conv_list.append(nn.Conv2d(in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + bias=True)) + + def forward(self, inputs): + if len(inputs) != self.num_levels: + raise ValueError('Number of inputs and `num_levels` mismatch!') + + # Project all related features to `out_channels`. + laterals = [] + for i in range(self.num_levels): + laterals.append(self.lateral_conv_list[i](inputs[i])) + + # Fusion, starting from `top_level`. + for i in range(self.num_levels - 1, 0, -1): + scale_factor = laterals[i - 1].shape[2] // laterals[i].shape[2] + laterals[i - 1] = (laterals[i - 1] + + F.interpolate(laterals[i], + mode='nearest', + scale_factor=scale_factor)) + + # Get outputs. + outputs = [] + for i, lateral in enumerate(laterals): + outputs.append(self.feature_conv_list[i](lateral)) + + return outputs + + +class SAM(nn.Module): + """Implementation of Spatial Alignment Module (SAM). + + The input of this module is a pyramid of features with reducing resolutions. + Then this module downsamples all levels of feature to the minimum resolution + and fuses it with the smallest feature map. + + Args: + pyramid_channels: A list of integers, each of which indicates the number + of channels of the feature from a particular level. + out_channels: Number of channels for each output. + + Returns: + A list of feature maps, each of which has `out_channels` channels. + """ + + def __init__(self, pyramid_channels, out_channels): + super().__init__() + assert isinstance(pyramid_channels, (list, tuple)) + self.num_levels = len(pyramid_channels) + + self.fusion_conv_list = nn.ModuleList() + for i in range(self.num_levels): + in_channels = pyramid_channels[i] + self.fusion_conv_list.append(nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + bias=True)) + + def forward(self, inputs): + if len(inputs) != self.num_levels: + raise ValueError('Number of inputs and `num_levels` mismatch!') + + output_res = inputs[-1].shape[2:] + for i in range(self.num_levels - 1, -1, -1): + if i != self.num_levels - 1: + inputs[i] = F.adaptive_avg_pool2d(inputs[i], output_res) + inputs[i] = self.fusion_conv_list[i](inputs[i]) + if i != self.num_levels - 1: + inputs[i] = inputs[i] + inputs[-1] + + return inputs + + +class CodeHead(nn.Module): + """Implementation of the task-head to produce inverted codes.""" + + def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2d): + super().__init__() + self.fc = nn.Linear(in_channels, out_channels, bias=True) + if norm_layer is None: + self.norm = nn.Identity() + else: + self.norm = norm_layer(out_channels) + + def forward(self, x): + if x.ndim > 2: + x = x.flatten(start_dim=1) + latent = self.fc(x) + latent = latent.unsqueeze(2).unsqueeze(3) + latent = self.norm(latent) + + return latent.flatten(start_dim=1) + +class EncoderNet(nn.Module): + """Define the ResNet-based encoder network for GAN inversion. + + On top of the backbone, there are several task-heads to produce inverted + codes. Please use `latent_dim` and `num_latents_per_head` to define the + structure. + + Settings for the encoder network: + + (1) resolution: The resolution of the output image. + (2) latent_dim: Dimension of the latent space. A number (one code will be + produced), or a list of numbers regarding layer-wise latent codes. + (3) num_latents_per_head: Number of latents that is produced by each head. + (4) image_channels: Number of channels of the output image. (default: 3) + + ResNet-related settings: + + (1) network_depth: Depth of the network, like 18 for ResNet18. (default: 18) + (2) inplanes: Number of channels of the first convolutional layer. + (default: 64) + (3) groups: Groups of the convolution, used in ResNet. (default: 1) + (4) width_per_group: Number of channels per group, used in ResNet. + (default: 64) + (5) replace_stride_with_dilation: Wether to replace stride with dilation, + used in ResNet. (default: None) + (6) norm_layer: Normalization layer used in the encoder. + (default: nn.BatchNorm2d) + (7) max_channels: Maximum number of channels in each layer. (default: 512) + + Task-head related settings: + + (1) use_fpn: Whether to use Feature Pyramid Network (FPN) before outputing + the latent code. (default: True) + (2) fpn_channels: Number of channels used in FPN. (default: 512) + (3) use_sam: Whether to use Spatial Alignment Module (SAM) before outputing + the latent code. (default: True) + (4) sam_channels: Number of channels used in SAM. (default: 512) + """ + + arch_settings = { + 18: (BasicBlock, [2, 2, 2, 2]), + 34: (BasicBlock, [3, 4, 6, 3]), + 50: (Bottleneck, [3, 4, 6, 3]), + 101: (Bottleneck, [3, 4, 23, 3]), + 152: (Bottleneck, [3, 8, 36, 3]) + } + + def __init__(self, + resolution, + latent_dim, + num_latents_per_head, + image_channels=3, + network_depth=18, + inplanes=64, + groups=1, + width_per_group=64, + replace_stride_with_dilation=None, + norm_layer=nn.BatchNorm2d, + max_channels=512, + use_fpn=True, + fpn_channels=512, + use_sam=True, + sam_channels=512): + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + if network_depth not in self.arch_settings: + raise ValueError(f'Invalid network depth: `{network_depth}`!\n' + f'Options allowed: ' + f'{list(self.arch_settings.keys())}.') + if isinstance(latent_dim, int): + latent_dim = [latent_dim] + assert isinstance(latent_dim, (list, tuple)) + assert isinstance(num_latents_per_head, (list, tuple)) + assert sum(num_latents_per_head) == len(latent_dim) + + self.resolution = resolution + self.latent_dim = latent_dim + self.num_latents_per_head = num_latents_per_head + self.num_heads = len(self.num_latents_per_head) + self.image_channels = image_channels + self.inplanes = inplanes + self.network_depth = network_depth + self.groups = groups + self.dilation = 1 + self.base_width = width_per_group + self.replace_stride_with_dilation = replace_stride_with_dilation + if norm_layer is None or norm_layer == nn.BatchNorm2d: + norm_layer = nn.SyncBatchNorm + self.norm_layer = norm_layer + self.max_channels = max_channels + self.use_fpn = use_fpn + self.fpn_channels = fpn_channels + self.use_sam = use_sam + self.sam_channels = sam_channels + + block_fn, num_blocks_per_stage = self.arch_settings[network_depth] + + self.num_stages = int(np.log2(resolution // _FINAL_RES)) - 1 + for i in range(4, self.num_stages): + num_blocks_per_stage.append(1) + if replace_stride_with_dilation is None: + replace_stride_with_dilation = [False] * self.num_stages + + # Backbone. + self.conv1 = nn.Conv2d(in_channels=self.image_channels, + out_channels=self.inplanes, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.stage_channels = [self.inplanes] + for i in range(1, self.num_stages + 1): + channels = min(self.max_channels, self.inplanes * (2 ** (i - 1))) + num_blocks = num_blocks_per_stage[i - 1] + stride = 1 if i == 1 else 2 + dilate = replace_stride_with_dilation[i - 1] + self.add_module(f'layer{i}', + self._make_stage(block_fn=block_fn, + planes=channels, + num_blocks=num_blocks, + stride=stride, + dilate=dilate)) + self.stage_channels.append(channels) + + if self.num_heads > len(self.stage_channels): + raise ValueError(f'Number of task heads is larger than number of ' + f'stages! Please reduce the number of heads.') + + # Task-head. + if self.num_heads == 1: + self.use_fpn = False + self.use_sam = False + + if self.use_fpn: + fpn_pyramid_channels = self.stage_channels[-self.num_heads:] + self.fpn = FPN(pyramid_channels=fpn_pyramid_channels, + out_channels=self.fpn_channels) + if self.use_sam: + if use_fpn: + sam_pyramid_channels = [self.fpn_channels] * self.num_heads + else: + sam_pyramid_channels = self.stage_channels[-self.num_heads:] + self.sam = SAM(pyramid_channels=sam_pyramid_channels, + out_channels=self.sam_channels) + + self.head_list = nn.ModuleList() + for head_idx in range(self.num_heads): + # Parse in_channels. + if self.use_sam: + in_channels = self.sam_channels + elif self.use_fpn: + in_channels = self.fpn_channels + else: + in_channels = self.stage_channels[head_idx - self.num_heads] + in_channels = in_channels * _FINAL_RES * _FINAL_RES + + # Parse out_channels. + start_latent_idx = sum(self.num_latents_per_head[:head_idx]) + end_latent_idx = sum(self.num_latents_per_head[:head_idx + 1]) + out_channels = sum(self.latent_dim[start_latent_idx:end_latent_idx]) + + self.head_list.append(CodeHead(in_channels=in_channels, + out_channels=out_channels, + norm_layer=self.norm_layer)) + + def _make_stage(self, block_fn, planes, num_blocks, stride=1, dilate=False): + norm_layer = self.norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block_fn.expansion: + downsample = nn.Sequential( + nn.Conv2d(in_channels=self.inplanes, + out_channels=planes * block_fn.expansion, + kernel_size=1, + stride=stride, + padding=0, + dilation=1, + groups=1, + bias=False), + norm_layer(planes * block_fn.expansion), + ) + + blocks = [] + blocks.append(block_fn(inplanes=self.inplanes, + planes=planes, + base_width=self.base_width, + stride=stride, + groups=self.groups, + dilation=previous_dilation, + norm_layer=norm_layer, + downsample=downsample)) + self.inplanes = planes * block_fn.expansion + for _ in range(1, num_blocks): + blocks.append(block_fn(inplanes=self.inplanes, + planes=planes, + base_width=self.base_width, + stride=1, + groups=self.groups, + dilation=self.dilation, + norm_layer=norm_layer, + downsample=None)) + + return nn.Sequential(*blocks) + + def _forward_impl(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + features = [x] + for i in range(1, self.num_stages + 1): + x = getattr(self, f'layer{i}')(x) + features.append(x) + features = features[-self.num_heads:] + + if self.use_fpn: + features = self.fpn(features) + if self.use_sam: + features = self.sam(features) + else: + final_size = features[-1].shape[2:] + for i in range(self.num_heads - 1): + features[i] = F.adaptive_avg_pool2d(features[i], final_size) + + outputs = [] + for head_idx in range(self.num_heads): + codes = self.head_list[head_idx](features[head_idx]) + start_latent_idx = sum(self.num_latents_per_head[:head_idx]) + end_latent_idx = sum(self.num_latents_per_head[:head_idx + 1]) + split_size = self.latent_dim[start_latent_idx:end_latent_idx] + outputs.extend(torch.split(codes, split_size, dim=1)) + max_dim = max(self.latent_dim) + for i, dim in enumerate(self.latent_dim): + if dim < max_dim: + outputs[i] = F.pad(outputs[i], (0, max_dim - dim)) + outputs[i] = outputs[i].unsqueeze(1) + + return torch.cat(outputs, dim=1) + + def forward(self, x): + return self._forward_impl(x) diff --git a/ContraCLIP/models/genforce/models/model_zoo.py b/ContraCLIP/models/genforce/models/model_zoo.py new file mode 100644 index 0000000000000000000000000000000000000000..064da0c1ebbe55d50fffd985ef73ac710d00a486 --- /dev/null +++ b/ContraCLIP/models/genforce/models/model_zoo.py @@ -0,0 +1,333 @@ +# python3.7 +"""Model zoo.""" + +# pylint: disable=line-too-long + +MODEL_ZOO = { + # PGGAN official. + 'pggan_celebahq1024': dict( + gan_type='pggan', + resolution=1024, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EW_3jQ6E7xlKvCSHYrbmkQQBAB8tgIv5W5evdT6-GuXiWw?e=gRifVa&download=1', + ), + 'pggan_bedroom256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EUZQWGz2GT5Bh_GJLalP63IBvCsXDTOxDFIC_ZBsmoEacA?e=VNXiDb&download=1', + ), + 'pggan_livingroom256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Efzh6qQv6QtCm0YN1lulH-YByqdE3AqlI-E6US_hXMuiig?e=ppdyB2&download=1', + ), + 'pggan_diningroom256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EcLb3_hGUkdClompZo27xk0BNmotgbFqdIeu-ZOGJsBMRg?e=xjYpN3&download=1', + ), + 'pggan_kitchen256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ESCyg6hpNn1LlHVX_un1wLsBZAORUNkW9MO2kU1X5kafAQ?e=09TbGC&download=1', + ), + 'pggan_church256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EQ8cKujs2TVGjCL_j6bsnk8BqD9REF2ME2lBnpbTPsqIvA?e=zH55fT&download=1', + ), + 'pggan_tower256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EeyBJvgRVGJClKr1KKYDF_cBT1FDepRU1-GLqYNh8W9-fQ?e=nrpa5N&download=1', + ), + 'pggan_bridge256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EZ2QScfPy19PiDERLJQ3gPMBP4WmvZHwhNFLzfaP2YD8hQ?e=bef1U9&download=1', + ), + 'pggan_restaurant256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERvJ4pz8jgtMrcuJXUfcOQEBDugZ099_TetCQs-9-ILCVg?e=qYsVdQ&download=1', + ), + 'pggan_classroom256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EUU9SCOPUxhMoUS4Ceo9kl0BQkVK7d69lA-JeOP-zOWvXw?e=YIB4no&download=1', + ), + 'pggan_conferenceroom256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EX8AF0_6NoJAl5vKFewHWnsBk0r4PK4WsqsMrJyj84TrqQ?e=oNQIZS&download=1', + ), + 'pggan_person256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWu4SqR42YpCoqsVJOcM2cMBcdfXA0j5wZ2hno9X0R9ydQ?e=KuDRns&download=1', + ), + 'pggan_cat256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EQdveyUNOMtAue52n6BxoHoB6Yup5-PTvBDmyfUn7Un4Hw?e=7acGbT&download=1', + ), + 'pggan_dog256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ESaKyXA5fGlOvXJYDDFbT2kB9c0HlXh9n_wnyhiP05nhow?e=d4aKDV&download=1', + ), + 'pggan_bird256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ef2p4Pd3AKVCmSm00YikCIABhylh2dLPaFjPfPVn3RiTXA?e=9bRitp&download=1', + ), + 'pggan_horse256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EXwCPdv6XqJFtuvFFoswRScBmLJbhKzaC5D_iovl1GFOTw?e=WDdD77&download=1', + ), + 'pggan_sheep256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ER6J5EKjAUNFtm9VwLf-uUsBZ5dnqxeKsPxY9ijiPtMhcQ?e=OKtfva&download=1', + ), + 'pggan_cow256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERZLxw7N7xJPm72FyePTbpcByzrr0pH-Fg7qyLt5tYGXwQ?e=ovIPCl&download=1', + ), + 'pggan_car256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EfGc2we47aFDtAY1548pRvsByIju-uXRbkZEFpJotuPKZw?e=DQqVj8&download=1', + ), + 'pggan_bicycle256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ed1dN_FgwmdBgeNWhaRUry8BgwT88-n2ppicSDPx-f7f_Q?e=bxTxnf&download=1', + ), + 'pggan_motorbike256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EV3yQdeJXIdPjZbMO0mp2-MBJbKuuBdypzBL4gnedO57Dw?e=tXdvtD&download=1', + ), + 'pggan_bus256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ed7-OYLnq0RCqRlM8qK8wZ8B87dz_NUxIKBrvyFUwRCEbg?e=VP5bmX&download=1', + ), + 'pggan_train256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EedE2cozKOVAkhvbdLd4SfwBknFW8vWZnKiqgeIBbAvCCA?e=BrLpTl&download=1', + ), + 'pggan_boat256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Eb39waqQFr9Bp4wO0rC5NHwB0Vz2NGCuqbRPucguBIkDrg?e=lddSyL&download=1', + ), + 'pggan_airplane256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ee6FzIx3KjNDhxrS5mDvpCEB3iQ7TgErmKhbwbV-eF07iw?e=xflPXa&download=1', + ), + 'pggan_bottle256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWhoy2AFCTZGtEG1UoayWjcB9Kdc_wreJ8p4RlBB93nbNg?e=DMZceU&download=1', + ), + 'pggan_chair256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EbQRTfwdostBhXG30Uacn7ABsEUFa-tEW3oxiM5zDYQbRw?e=FkB7T0&download=1', + ), + 'pggan_pottedplant256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWg7hnoGATBOuJvXWr4m7CQBJL9o7nqnD6nOMRhtH2SKXg?e=Zi3hjD&download=1', + ), + 'pggan_tvmonitor256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EVXwttoJVtBMuhHNDdK3cMwBdMiZARJV38PMTsL6whnFlA?e=RbG0ru&download=1', + ), + 'pggan_diningtable256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EXVzBkbmTCVImMtuHLCTBeMBXZmv0RWyx5KXQQAe7-7D5w?e=6RYSnm&download=1', + ), + 'pggan_sofa256': dict( + gan_type='pggan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EaADQYDXwY9NrzbiUFcRYRgBOu1GdJMG8YgNZZmbNjbn-Q?e=DqKrXG&download=1', + ), + + # StyleGAN official. + 'stylegan_ffhq1024': dict( + gan_type='stylegan', + resolution=1024, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EdfMxgb0hU9BoXwiR3dqYDEBowCSEF1IcsW3n4kwfoZ9OQ?e=VwIV58&download=1', + ), + 'stylegan_celebahq1024': dict( + gan_type='stylegan', + resolution=1024, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EcCdXHddE7FOvyfmqeOyc9ABqVuWh8PQYFnV6JM1CXvFig?e=1nUYZ5&download=1', + ), + 'stylegan_bedroom256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ea6RBPddjcRNoFMXm8AyEBcBUHdlRNtjtclNKFe89amjBw?e=Og8Vff&download=1', + ), + 'stylegan_cat256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EVjX8u9HuehLip3z0hRfIHcB7QtoFkTB7NiRDb8nrKOl2w?e=lHcp1B&download=1', + ), + 'stylegan_car512': dict( + gan_type='stylegan', + resolution=512, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EcRJNNzzUzJGjI2X53S9HjkBhXkKT5JRd6Q3IIhCY1AyRw?e=FvMRNj&download=1', + ), + + # StyleGAN ours. + 'stylegan_celeba_partial256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ET2etKNzMS9JmHj5j60fqMcBRJfQfYNvqUrujaIXxCvKDQ?e=QReLE6&download=1', + ), + 'stylegan_ffhq256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ES-NAUCC2qdHg87BftvlBiQBVpbJ8-005Q4TNr5KrOxQEw?e=00AnWt&download=1', + ), + 'stylegan_ffhq512': dict( + gan_type='stylegan', + resolution=512, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EZYrrwOiEgVOg-PfGv7QTegBzFQ9yq2v7o1WxNq5JJ9KNA?e=SZU8PI&download=1', + ), + 'stylegan_livingroom256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EfFCYLHjqbFDmjOvCCFJgDcBZ1QYgETfZJxp4ZTHjLxZBg?e=InVd0n&download=1', + ), + 'stylegan_diningroom256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERsUza_hSFRIm4iZCag7P0kBQ9EIdfQKByw4QYt_ay97lg?e=Cimh7S&download=1', + ), + 'stylegan_kitchen256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ERcYvoingQNKix35lUs0vUkBQQkAZMp1rtDxjwNlOJAoaA?e=a1Tcwr&download=1', + ), + 'stylegan_apartment256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EfurPNSB2BRFtXdqGkmDD6YBwyKN8YK2v7nKwnJQdsbf6A?e=w3oYa4&download=1', + ), + 'stylegan_church256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ETMgG1_d06tAlbUkJD1qA9IBaLZ9zJKPkG2kO-4jxhVV5w?e=Dbkb7o&download=1', + ), + 'stylegan_tower256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ebm9QMgqB2VDqyIE5rFhreEBgZ_RyKcRf8bQ333K453u3w?e=if8sDj&download=1', + ), + 'stylegan_bridge256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Ed9QM6OP9sVHnazSp4cqPSEBb-ALfBPXRxP1hD7FsTYh8w?e=3vv06p&download=1', + ), + 'stylegan_restaurant256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/ESDhYr01WtlEvBNFrVpFezcB2l9lF1rBYuHFoeNpBr5B7A?e=uFWFNh&download=1', + ), + 'stylegan_classroom256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EbWnI3oto9NPk-lxwZlWqPQB2atWpGiTWMIT59MzF9ij9Q?e=KvcNBg&download=1', + ), + 'stylegan_conferenceroom256': dict( + gan_type='stylegan', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Eb1gVi3pGa9PgJ4XYYu_6yABQZ0ZcGDak4FEHaTHaeYFzw?e=0BeE8t&download=1', + ), + + # StyleGAN third-party. + 'stylegan_animeface512': dict( + gan_type='stylegan', + resolution=512, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EWDWflY6lBpGgX0CGQpd2Z4B5wTEVamTOA9JRYne7zdCvA?e=tOzgYA&download=1', + ), + 'stylegan_animeportrait512': dict( + gan_type='stylegan', + resolution=512, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EXBvhTBi-v5NsnQtrxhFEKsBin4xg-Dud9Jr62AEwFTIxg?e=bMGK7r&download=1', + ), + 'stylegan_artface512': dict( + gan_type='stylegan', + resolution=512, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/Eca0OiGqhyZMmoPbKahSBWQBWvcAH4q2CE3zdZJflp2jkQ?e=h4rWAm&download=1', + ), + + # StyleGAN2 official. + 'stylegan2_ffhq1024': dict( + gan_type='stylegan2', + resolution=1024, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EX0DNWiBvl5FuOQTF4oMPBYBNSalcxTK0AbLwBn9Y3vfgg?e=Q0sZit&download=1', + ), + 'stylegan2_church256': dict( + gan_type='stylegan2', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EQzDtJUdQ4ROunMGn2sZouEBmNeFX4QWvxjermVE5cZvNA?e=tQ7r9r&download=1', + ), + 'stylegan2_cat256': dict( + gan_type='stylegan2', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EUKXeBwUUbZJr6kup7PW4ekBx2-vmTp8FjcGb10v8bgJxQ?e=nkerMF&download=1', + ), + 'stylegan2_horse256': dict( + gan_type='stylegan2', + resolution=256, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EconoT6tb69OuAIqfXRtGlsBZz4vBx01UmmFO-JAS356Jg?e=bcSCC4&download=1', + ), + 'stylegan2_car512': dict( + gan_type='stylegan2', + resolution=512, + url='https://mycuhk-my.sharepoint.com/:u:/g/personal/1155082926_link_cuhk_edu_hk/EYSnUsxU8KJFuMHhZm-JLWoB0nHxdlbrLHNZ_Qkoe3b9LA?e=Ycjp5A&download=1', + ), + + ############ + ############ StyleGANPORT + ############ + 'stylegan2_afhqdog512': dict( + gan_type='stylegan2', + resolution=512, + url='http://eecs.qmul.ac.uk/~jo001/gan_checkpoints/stylegan2_afhqdog512.pth', + ), + 'stylegan2_afhqcat512': dict( + gan_type='stylegan2', + resolution=512, + url='http://eecs.qmul.ac.uk/~jo001/gan_checkpoints/stylegan2_afhqcat512.pth', + ), + # TODO: update this one to EECs server + 'stylegan2_afhqv2512': dict( + gan_type='stylegan2', + resolution=512, + url='', + ), + # TODO: ditto above + 'stylegan2_metfaces1024': dict( + gan_type='stylegan2', + resolution=512, + url='', + ), + ############ + ############ + ############ + +} + +# pylint: enable=line-too-long diff --git a/ContraCLIP/models/genforce/models/perceptual_model.py b/ContraCLIP/models/genforce/models/perceptual_model.py new file mode 100644 index 0000000000000000000000000000000000000000..1f21496f7b52dbf102a197fda2a12b8e3770a87e --- /dev/null +++ b/ContraCLIP/models/genforce/models/perceptual_model.py @@ -0,0 +1,148 @@ +# python 3.7 +"""Contains the VGG16 model for perceptual feature extraction. + +This file is particularly used for computing perceptual loss and hence is highly +recommended to use with pre-trained weights. + +The PyTorch weights can be downloaded from + +https://drive.google.com/file/d/1qQ-r7MYZ8ZcjQQFe17eQfJbOAuE3eS0y/view?usp=sharing + +which is converted from the Keras model + +https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5 + +The variable mapping is shown below + +pth_to_tf_var_mapping = { + 'layer0.weight': 'block1_conv1_W_1:0', # [64, 3, 3, 3] + 'layer0.bias': 'block1_conv1_b_1:0', # [64] + 'layer2.weight': 'block1_conv2_W_1:0', # [64, 64, 3, 3] + 'layer2.bias': 'block1_conv2_b_1:0', # [64] + 'layer5.weight': 'block2_conv1_W_1:0', # [128, 64, 3, 3] + 'layer5.bias': 'block2_conv1_b_1:0', # [128] + 'layer7.weight': 'block2_conv2_W_1:0', # [128, 128, 3, 3] + 'layer7.bias': 'block2_conv2_b_1:0', # [128] + 'layer10.weight': 'block3_conv1_W_1:0', # [256, 128, 3, 3] + 'layer10.bias': 'block3_conv1_b_1:0', # [256] + 'layer12.weight': 'block3_conv2_W_1:0', # [256, 256, 3, 3] + 'layer12.bias': 'block3_conv2_b_1:0', # [256] + 'layer14.weight': 'block3_conv3_W_1:0', # [256, 256, 3, 3] + 'layer14.bias': 'block3_conv3_b_1:0', # [256] + 'layer17.weight': 'block4_conv1_W_1:0', # [512, 256, 3, 3] + 'layer17.bias': 'block4_conv1_b_1:0', # [512] + 'layer19.weight': 'block4_conv2_W_1:0', # [512, 512, 3, 3] + 'layer19.bias': 'block4_conv2_b_1:0', # [512] + 'layer21.weight': 'block4_conv3_W_1:0', # [512, 512, 3, 3] + 'layer21.bias': 'block4_conv3_b_1:0', # [512] + 'layer24.weight': 'block5_conv1_W_1:0', # [512, 512, 3, 3] + 'layer24.bias': 'block5_conv1_b_1:0', # [512] + 'layer26.weight': 'block5_conv2_W_1:0', # [512, 512, 3, 3] + 'layer26.bias': 'block5_conv2_b_1:0', # [512] + 'layer28.weight': 'block5_conv3_W_1:0', # [512, 512, 3, 3] + 'layer28.bias': 'block5_conv3_b_1:0', # [512] +} +""" + +import os +import warnings +from collections import OrderedDict +import numpy as np + +import torch +import torch.nn as nn + +__all__ = ['PerceptualModel'] + +_MEAN_STATS = (103.939, 116.779, 123.68) + + +class PerceptualModel(nn.Module): + """Defines the VGG16 structure as the perceptual network. + + This model takes `RGB` images with data format `NCHW` as the raw inputs, and + outputs the perceptual feature. This following operations will be performed + to preprocess the inputs to match the preprocessing during the model + training: + (1) Shift pixel range to [0, 255]. + (2) Change channel order to `BGR`. + (3) Subtract the statistical mean. + + NOTE: The three fully connected layers on top of the model are dropped. + """ + + def __init__(self, + output_layer_idx=23, + min_val=-1.0, + max_val=1.0, + pretrained_weight_path=None): + """Defines the network structure. + + Args: + output_layer_idx: Index of layer whose output will be used as the + perceptual feature. (default: 23, which is the `block4_conv3` + layer activated by `ReLU` function) + min_val: Minimum value of the raw input. (default: -1.0) + max_val: Maximum value of the raw input. (default: 1.0) + pretrained_weight_path: Path to the pretrained weights. + (default: None) + """ + super().__init__() + self.vgg16 = nn.Sequential(OrderedDict({ + 'layer0': nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1), + 'layer1': nn.ReLU(inplace=True), + 'layer2': nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), + 'layer3': nn.ReLU(inplace=True), + 'layer4': nn.MaxPool2d(kernel_size=2, stride=2), + 'layer5': nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), + 'layer6': nn.ReLU(inplace=True), + 'layer7': nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), + 'layer8': nn.ReLU(inplace=True), + 'layer9': nn.MaxPool2d(kernel_size=2, stride=2), + 'layer10': nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), + 'layer11': nn.ReLU(inplace=True), + 'layer12': nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), + 'layer13': nn.ReLU(inplace=True), + 'layer14': nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), + 'layer15': nn.ReLU(inplace=True), + 'layer16': nn.MaxPool2d(kernel_size=2, stride=2), + 'layer17': nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), + 'layer18': nn.ReLU(inplace=True), + 'layer19': nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), + 'layer20': nn.ReLU(inplace=True), + 'layer21': nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), + 'layer22': nn.ReLU(inplace=True), + 'layer23': nn.MaxPool2d(kernel_size=2, stride=2), + 'layer24': nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), + 'layer25': nn.ReLU(inplace=True), + 'layer26': nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), + 'layer27': nn.ReLU(inplace=True), + 'layer28': nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), + 'layer29': nn.ReLU(inplace=True), + 'layer30': nn.MaxPool2d(kernel_size=2, stride=2), + })) + self.output_layer_idx = output_layer_idx + self.min_val = min_val + self.max_val = max_val + self.mean = torch.from_numpy(np.array(_MEAN_STATS)).view(1, 3, 1, 1) + self.mean = self.mean.type(torch.FloatTensor) + + self.pretrained_weight_path = pretrained_weight_path + if os.path.isfile(self.pretrained_weight_path): + self.vgg16.load_state_dict( + torch.load(self.pretrained_weight_path, map_location='cpu')) + else: + warnings.warn('No pre-trained weights found for perceptual model!') + + def forward(self, x): + x = (x - self.min_val) * 255.0 / (self.max_val - self.min_val) + x = x.flip(1) # RGB to BGR + x = x - self.mean.to(x) + # TODO: Resize image? + for idx, layer in enumerate(self.vgg16.children()): + if idx == self.output_layer_idx: + break + x = layer(x) + # x = x.permute(0, 2, 3, 1) + x = x.flatten(start_dim=1) + return x diff --git a/ContraCLIP/models/genforce/models/pggan_discriminator.py b/ContraCLIP/models/genforce/models/pggan_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..c1bc97fa513eb3075d0b6bd1df775236cacf396a --- /dev/null +++ b/ContraCLIP/models/genforce/models/pggan_discriminator.py @@ -0,0 +1,402 @@ +# python3.7 +"""Contains the implementation of discriminator described in PGGAN. + +Paper: https://arxiv.org/pdf/1710.10196.pdf + +Official TensorFlow implementation: +https://github.com/tkarras/progressive_growing_of_gans +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = ['PGGANDiscriminator'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Initial resolution. +_INIT_RES = 4 + +# Default gain factor for weight scaling. +_WSCALE_GAIN = np.sqrt(2.0) + + +class PGGANDiscriminator(nn.Module): + """Defines the discriminator network in PGGAN. + + NOTE: The discriminator takes images with `RGB` channel order and pixel + range [-1, 1] as inputs. + + Settings for the network: + + (1) resolution: The resolution of the input image. + (2) image_channels: Number of channels of the input image. (default: 3) + (3) label_size: Size of the additional label for conditional generation. + (default: 0) + (4) fused_scale: Whether to fused `conv2d` and `downsample` together, + resulting in `conv2d` with strides. (default: False) + (5) use_wscale: Whether to use weight scaling. (default: True) + (6) minibatch_std_group_size: Group size for the minibatch standard + deviation layer. 0 means disable. (default: 16) + (7) fmaps_base: Factor to control number of feature maps for each layer. + (default: 16 << 10) + (8) fmaps_max: Maximum number of feature maps in each layer. (default: 512) + """ + + def __init__(self, + resolution, + image_channels=3, + label_size=0, + fused_scale=False, + use_wscale=True, + minibatch_std_group_size=16, + fmaps_base=16 << 10, + fmaps_max=512): + """Initializes with basic settings. + + Raises: + ValueError: If the `resolution` is not supported. + """ + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + + self.init_res = _INIT_RES + self.init_res_log2 = int(np.log2(self.init_res)) + self.resolution = resolution + self.final_res_log2 = int(np.log2(self.resolution)) + self.image_channels = image_channels + self.label_size = label_size + self.fused_scale = fused_scale + self.use_wscale = use_wscale + self.minibatch_std_group_size = minibatch_std_group_size + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + + # Level of detail (used for progressive training). + self.register_buffer('lod', torch.zeros(())) + self.pth_to_tf_var_mapping = {'lod': 'lod'} + + for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): + res = 2 ** res_log2 + block_idx = self.final_res_log2 - res_log2 + + # Input convolution layer for each resolution. + self.add_module( + f'input{block_idx}', + ConvBlock(in_channels=self.image_channels, + out_channels=self.get_nf(res), + kernel_size=1, + padding=0, + use_wscale=self.use_wscale)) + self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = ( + f'FromRGB_lod{block_idx}/weight') + self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = ( + f'FromRGB_lod{block_idx}/bias') + + # Convolution block for each resolution (except the last one). + if res != self.init_res: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale)) + tf_layer0_name = 'Conv0' + self.add_module( + f'layer{2 * block_idx + 1}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res // 2), + downsample=True, + fused_scale=self.fused_scale, + use_wscale=self.use_wscale)) + tf_layer1_name = 'Conv1_down' if self.fused_scale else 'Conv1' + + # Convolution block for last resolution. + else: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock( + in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale, + minibatch_std_group_size=self.minibatch_std_group_size)) + tf_layer0_name = 'Conv' + self.add_module( + f'layer{2 * block_idx + 1}', + DenseBlock(in_channels=self.get_nf(res) * res * res, + out_channels=self.get_nf(res // 2), + use_wscale=self.use_wscale)) + tf_layer1_name = 'Dense0' + + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = ( + f'{res}x{res}/{tf_layer0_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = ( + f'{res}x{res}/{tf_layer0_name}/bias') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = ( + f'{res}x{res}/{tf_layer1_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = ( + f'{res}x{res}/{tf_layer1_name}/bias') + + # Final dense block. + self.add_module( + f'layer{2 * block_idx + 2}', + DenseBlock(in_channels=self.get_nf(res // 2), + out_channels=1 + self.label_size, + use_wscale=self.use_wscale, + wscale_gain=1.0, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = ( + f'{res}x{res}/Dense1/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = ( + f'{res}x{res}/Dense1/bias') + + self.downsample = DownsamplingLayer() + + def get_nf(self, res): + """Gets number of feature maps according to current resolution.""" + return min(self.fmaps_base // res, self.fmaps_max) + + def forward(self, image, lod=None, **_unused_kwargs): + expected_shape = (self.image_channels, self.resolution, self.resolution) + if image.ndim != 4 or image.shape[1:] != expected_shape: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, channel, height, width], where ' + f'`channel` equals to {self.image_channels}, ' + f'`height`, `width` equal to {self.resolution}!\n' + f'But `{image.shape}` is received!') + + lod = self.lod.cpu().tolist() if lod is None else lod + if lod + self.init_res_log2 > self.final_res_log2: + raise ValueError(f'Maximum level-of-detail (lod) is ' + f'{self.final_res_log2 - self.init_res_log2}, ' + f'but `{lod}` is received!') + + lod = self.lod.cpu().tolist() + for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): + block_idx = current_lod = self.final_res_log2 - res_log2 + if current_lod <= lod < current_lod + 1: + x = self.__getattr__(f'input{block_idx}')(image) + elif current_lod - 1 < lod < current_lod: + alpha = lod - np.floor(lod) + x = (self.__getattr__(f'input{block_idx}')(image) * alpha + + x * (1 - alpha)) + if lod < current_lod + 1: + x = self.__getattr__(f'layer{2 * block_idx}')(x) + x = self.__getattr__(f'layer{2 * block_idx + 1}')(x) + if lod > current_lod: + image = self.downsample(image) + x = self.__getattr__(f'layer{2 * block_idx + 2}')(x) + return x + + +class MiniBatchSTDLayer(nn.Module): + """Implements the minibatch standard deviation layer.""" + + def __init__(self, group_size=16, epsilon=1e-8): + super().__init__() + self.group_size = group_size + self.epsilon = epsilon + + def forward(self, x): + if self.group_size <= 1: + return x + group_size = min(self.group_size, x.shape[0]) # [NCHW] + y = x.view(group_size, -1, x.shape[1], x.shape[2], x.shape[3]) # [GMCHW] + y = y - torch.mean(y, dim=0, keepdim=True) # [GMCHW] + y = torch.mean(y ** 2, dim=0) # [MCHW] + y = torch.sqrt(y + self.epsilon) # [MCHW] + y = torch.mean(y, dim=[1, 2, 3], keepdim=True) # [M111] + y = y.repeat(group_size, 1, x.shape[2], x.shape[3]) # [N1HW] + return torch.cat([x, y], dim=1) + + +class DownsamplingLayer(nn.Module): + """Implements the downsampling layer. + + Basically, this layer can be used to downsample feature maps with average + pooling. + """ + + def __init__(self, scale_factor=2): + super().__init__() + self.scale_factor = scale_factor + + def forward(self, x): + if self.scale_factor <= 1: + return x + return F.avg_pool2d(x, + kernel_size=self.scale_factor, + stride=self.scale_factor, + padding=0) + + +class ConvBlock(nn.Module): + """Implements the convolutional block. + + Basically, this block executes minibatch standard deviation layer (if + needed), convolutional layer, activation layer, and downsampling layer ( + if needed) in sequence. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + add_bias=True, + downsample=False, + fused_scale=False, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + activation_type='lrelu', + minibatch_std_group_size=0): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + kernel_size: Size of the convolutional kernels. (default: 3) + stride: Stride parameter for convolution operation. (default: 1) + padding: Padding parameter for convolution operation. (default: 1) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + downsample: Whether to downsample the result after convolution. + (default: False) + fused_scale: Whether to fused `conv2d` and `downsample` together, + resulting in `conv2d` with strides. (default: False) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + minibatch_std_group_size: Group size for the minibatch standard + deviation layer. 0 means disable. (default: 0) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + if minibatch_std_group_size > 1: + in_channels = in_channels + 1 + self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size) + else: + self.mbstd = nn.Identity() + + if downsample and not fused_scale: + self.downsample = DownsamplingLayer() + else: + self.downsample = nn.Identity() + + if downsample and fused_scale: + self.use_stride = True + self.stride = 2 + self.padding = 1 + else: + self.use_stride = False + self.stride = stride + self.padding = padding + + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape)) + self.wscale = wscale + else: + self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale) + self.wscale = 1.0 + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + x = self.mbstd(x) + weight = self.weight * self.wscale + if self.use_stride: + weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0) + weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] + + weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) * 0.25 + x = F.conv2d(x, + weight=weight, + bias=self.bias, + stride=self.stride, + padding=self.padding) + x = self.activate(x) + x = self.downsample(x) + return x + + +class DenseBlock(nn.Module): + """Implements the dense block. + + Basically, this block executes fully-connected layer, and activation layer. + """ + + def __init__(self, + in_channels, + out_channels, + add_bias=True, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + add_bias: Whether to add bias onto the fully-connected result. + (default: True) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + weight_shape = (out_channels, in_channels) + wscale = wscale_gain / np.sqrt(in_channels) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape)) + self.wscale = wscale + else: + self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale) + self.wscale = 1.0 + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + if x.ndim != 2: + x = x.view(x.shape[0], -1) + x = F.linear(x, weight=self.weight * self.wscale, bias=self.bias) + x = self.activate(x) + return x diff --git a/ContraCLIP/models/genforce/models/pggan_generator.py b/ContraCLIP/models/genforce/models/pggan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5c10d8e79eddd1ac9d96644eebc383eb51a470 --- /dev/null +++ b/ContraCLIP/models/genforce/models/pggan_generator.py @@ -0,0 +1,335 @@ +# python3.7 +"""Contains the implementation of generator described in PGGAN. + +Paper: https://arxiv.org/pdf/1710.10196.pdf + +Official TensorFlow implementation: +https://github.com/tkarras/progressive_growing_of_gans +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = ['PGGANGenerator'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Initial resolution. +_INIT_RES = 4 + +# Default gain factor for weight scaling. +_WSCALE_GAIN = np.sqrt(2.0) + + +class PGGANGenerator(nn.Module): + """Defines the generator network in PGGAN. + + NOTE: The synthesized images are with `RGB` channel order and pixel range + [-1, 1]. + + Settings for the network: + + (1) resolution: The resolution of the output image. + (2) z_space_dim: The dimension of the latent space, Z. (default: 512) + (3) image_channels: Number of channels of the output image. (default: 3) + (4) final_tanh: Whether to use `tanh` to control the final pixel range. + (default: False) + (5) label_size: Size of the additional label for conditional generation. + (default: 0) + (6) fused_scale: Whether to fused `upsample` and `conv2d` together, + resulting in `conv2d_transpose`. (default: False) + (7) use_wscale: Whether to use weight scaling. (default: True) + (8) fmaps_base: Factor to control number of feature maps for each layer. + (default: 16 << 10) + (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512) + """ + + def __init__(self, + resolution, + z_space_dim=512, + image_channels=3, + final_tanh=False, + label_size=0, + fused_scale=False, + use_wscale=True, + fmaps_base=16 << 10, + fmaps_max=512, + latent_is_w=False): + """Initializes with basic settings. + + Raises: + ValueError: If the `resolution` is not supported. + """ + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + + self.init_res = _INIT_RES + self.init_res_log2 = int(np.log2(self.init_res)) + self.resolution = resolution + self.final_res_log2 = int(np.log2(self.resolution)) + self.z_space_dim = z_space_dim + self.image_channels = image_channels + self.final_tanh = final_tanh + self.label_size = label_size + self.fused_scale = fused_scale + self.use_wscale = use_wscale + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + self.latent_is_w = latent_is_w + + # Number of convolutional layers. + self.num_layers = (self.final_res_log2 - self.init_res_log2 + 1) * 2 + + # Level of detail (used for progressive training). + self.register_buffer('lod', torch.zeros(())) + self.pth_to_tf_var_mapping = {'lod': 'lod'} + + for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1): + res = 2 ** res_log2 + block_idx = res_log2 - self.init_res_log2 + + # First convolution layer for each resolution. + if res == self.init_res: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.z_space_dim + self.label_size, + out_channels=self.get_nf(res), + kernel_size=self.init_res, + padding=self.init_res - 1, + use_wscale=self.use_wscale)) + tf_layer_name = 'Dense' + else: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.get_nf(res // 2), + out_channels=self.get_nf(res), + upsample=True, + fused_scale=self.fused_scale, + use_wscale=self.use_wscale)) + tf_layer_name = 'Conv0_up' if self.fused_scale else 'Conv0' + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = ( + f'{res}x{res}/{tf_layer_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = ( + f'{res}x{res}/{tf_layer_name}/bias') + + # Second convolution layer for each resolution. + self.add_module( + f'layer{2 * block_idx + 1}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale)) + tf_layer_name = 'Conv' if res == self.init_res else 'Conv1' + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = ( + f'{res}x{res}/{tf_layer_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = ( + f'{res}x{res}/{tf_layer_name}/bias') + + # Output convolution layer for each resolution. + self.add_module( + f'output{block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.image_channels, + kernel_size=1, + padding=0, + use_wscale=self.use_wscale, + wscale_gain=1.0, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'output{block_idx}.weight'] = ( + f'ToRGB_lod{self.final_res_log2 - res_log2}/weight') + self.pth_to_tf_var_mapping[f'output{block_idx}.bias'] = ( + f'ToRGB_lod{self.final_res_log2 - res_log2}/bias') + + self.upsample = UpsamplingLayer() + self.final_activate = nn.Tanh() if self.final_tanh else nn.Identity() + + def get_nf(self, res): + """Gets number of feature maps according to current resolution.""" + return min(self.fmaps_base // res, self.fmaps_max) + + def forward(self, z, label=None, lod=None, latent_is_w=False, **_unused_kwargs): + if z.ndim != 2 or z.shape[1] != self.z_space_dim: + raise ValueError(f'Input latent code should be with shape ' + f'[batch_size, latent_dim], where ' + f'`latent_dim` equals to {self.z_space_dim}!\n' + f'But `{z.shape}` is received!') + z = self.layer0.pixel_norm(z) + if self.label_size: + if label is None: + raise ValueError(f'Model requires an additional label ' + f'(with size {self.label_size}) as input, ' + f'but no label is received!') + if label.ndim != 2 or label.shape != (z.shape[0], self.label_size): + raise ValueError(f'Input label should be with shape ' + f'[batch_size, label_size], where ' + f'`batch_size` equals to that of ' + f'latent codes ({z.shape[0]}) and ' + f'`label_size` equals to {self.label_size}!\n' + f'But `{label.shape}` is received!') + z = torch.cat((z, label), dim=1) + + lod = self.lod.cpu().tolist() if lod is None else lod + if lod + self.init_res_log2 > self.final_res_log2: + raise ValueError(f'Maximum level-of-detail (lod) is ' + f'{self.final_res_log2 - self.init_res_log2}, ' + f'but `{lod}` is received!') + + x = z.view(z.shape[0], self.z_space_dim + self.label_size, 1, 1) + for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1): + current_lod = self.final_res_log2 - res_log2 + if lod < current_lod + 1: + block_idx = res_log2 - self.init_res_log2 + x = self.__getattr__(f'layer{2 * block_idx}')(x) + x = self.__getattr__(f'layer{2 * block_idx + 1}')(x) + if current_lod - 1 < lod <= current_lod: + image = self.__getattr__(f'output{block_idx}')(x) + elif current_lod < lod < current_lod + 1: + alpha = np.ceil(lod) - lod + image = (self.__getattr__(f'output{block_idx}')(x) * alpha + + self.upsample(image) * (1 - alpha)) + elif lod >= current_lod + 1: + image = self.upsample(image) + image = self.final_activate(image) + + return image + + +class PixelNormLayer(nn.Module): + """Implements pixel-wise feature vector normalization layer.""" + + def __init__(self, epsilon=1e-8): + super().__init__() + self.eps = epsilon + + def forward(self, x): + norm = torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.eps) + return x / norm + + +class UpsamplingLayer(nn.Module): + """Implements the upsampling layer. + + Basically, this layer can be used to upsample feature maps with nearest + neighbor interpolation. + """ + + def __init__(self, scale_factor=2): + super().__init__() + self.scale_factor = scale_factor + + def forward(self, x): + if self.scale_factor <= 1: + return x + return F.interpolate(x, scale_factor=self.scale_factor, mode='nearest') + + +class ConvBlock(nn.Module): + """Implements the convolutional block. + + Basically, this block executes pixel-wise normalization layer, upsampling + layer (if needed), convolutional layer, and activation layer in sequence. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + add_bias=True, + upsample=False, + fused_scale=False, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + kernel_size: Size of the convolutional kernels. (default: 3) + stride: Stride parameter for convolution operation. (default: 1) + padding: Padding parameter for convolution operation. (default: 1) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + upsample: Whether to upsample the input tensor before convolution. + (default: False) + fused_scale: Whether to fused `upsample` and `conv2d` together, + resulting in `conv2d_transpose`. (default: False) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + self.pixel_norm = PixelNormLayer() + + if upsample and not fused_scale: + self.upsample = UpsamplingLayer() + else: + self.upsample = nn.Identity() + + if upsample and fused_scale: + self.use_conv2d_transpose = True + weight_shape = (in_channels, out_channels, kernel_size, kernel_size) + self.stride = 2 + self.padding = 1 + else: + self.use_conv2d_transpose = False + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + self.stride = stride + self.padding = padding + + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape)) + self.wscale = wscale + else: + self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale) + self.wscale = 1.0 + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + x = self.pixel_norm(x) + x = self.upsample(x) + weight = self.weight * self.wscale + if self.use_conv2d_transpose: + weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0) + weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] + + weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) + x = F.conv_transpose2d(x, + weight=weight, + bias=self.bias, + stride=self.stride, + padding=self.padding) + else: + x = F.conv2d(x, + weight=weight, + bias=self.bias, + stride=self.stride, + padding=self.padding) + x = self.activate(x) + return x diff --git a/ContraCLIP/models/genforce/models/stylegan2_discriminator.py b/ContraCLIP/models/genforce/models/stylegan2_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..9f5e43104f40f46ff0220df36164cdbae9fcff32 --- /dev/null +++ b/ContraCLIP/models/genforce/models/stylegan2_discriminator.py @@ -0,0 +1,468 @@ +# python3.7 +"""Contains the implementation of discriminator described in StyleGAN2. + +Compared to that of StyleGAN, the discriminator in StyleGAN2 mainly adds skip +connections, increases model size and disables progressive growth. This script +ONLY supports config F in the original paper. + +Paper: https://arxiv.org/pdf/1912.04958.pdf + +Official TensorFlow implementation: https://github.com/NVlabs/stylegan2 +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = ['StyleGAN2Discriminator'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Initial resolution. +_INIT_RES = 4 + +# Architectures allowed. +_ARCHITECTURES_ALLOWED = ['resnet', 'skip', 'origin'] + +# Default gain factor for weight scaling. +_WSCALE_GAIN = 1.0 + + +class StyleGAN2Discriminator(nn.Module): + """Defines the discriminator network in StyleGAN2. + + NOTE: The discriminator takes images with `RGB` channel order and pixel + range [-1, 1] as inputs. + + Settings for the network: + + (1) resolution: The resolution of the input image. + (2) image_channels: Number of channels of the input image. (default: 3) + (3) label_size: Size of the additional label for conditional generation. + (default: 0) + (4) architecture: Type of architecture. Support `origin`, `skip`, and + `resnet`. (default: `resnet`) + (5) use_wscale: Whether to use weight scaling. (default: True) + (6) minibatch_std_group_size: Group size for the minibatch standard + deviation layer. 0 means disable. (default: 4) + (7) minibatch_std_channels: Number of new channels after the minibatch + standard deviation layer. (default: 1) + (8) fmaps_base: Factor to control number of feature maps for each layer. + (default: 32 << 10) + (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512) + """ + + def __init__(self, + resolution, + image_channels=3, + label_size=0, + architecture='resnet', + use_wscale=True, + minibatch_std_group_size=4, + minibatch_std_channels=1, + fmaps_base=32 << 10, + fmaps_max=512): + """Initializes with basic settings. + + Raises: + ValueError: If the `resolution` is not supported, or `architecture` + is not supported. + """ + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + if architecture not in _ARCHITECTURES_ALLOWED: + raise ValueError(f'Invalid architecture: `{architecture}`!\n' + f'Architectures allowed: ' + f'{_ARCHITECTURES_ALLOWED}.') + + self.init_res = _INIT_RES + self.init_res_log2 = int(np.log2(self.init_res)) + self.resolution = resolution + self.final_res_log2 = int(np.log2(self.resolution)) + self.image_channels = image_channels + self.label_size = label_size + self.architecture = architecture + self.use_wscale = use_wscale + self.minibatch_std_group_size = minibatch_std_group_size + self.minibatch_std_channels = minibatch_std_channels + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + + self.pth_to_tf_var_mapping = {} + for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): + res = 2 ** res_log2 + block_idx = self.final_res_log2 - res_log2 + + # Input convolution layer for each resolution (if needed). + if res_log2 == self.final_res_log2 or self.architecture == 'skip': + self.add_module( + f'input{block_idx}', + ConvBlock(in_channels=self.image_channels, + out_channels=self.get_nf(res), + kernel_size=1, + use_wscale=self.use_wscale)) + self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = ( + f'{res}x{res}/FromRGB/weight') + self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = ( + f'{res}x{res}/FromRGB/bias') + + # Convolution block for each resolution (except the last one). + if res != self.init_res: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale)) + tf_layer0_name = 'Conv0' + self.add_module( + f'layer{2 * block_idx + 1}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res // 2), + scale_factor=2, + use_wscale=self.use_wscale)) + tf_layer1_name = 'Conv1_down' + + if self.architecture == 'resnet': + layer_name = f'skip_layer{block_idx}' + self.add_module( + layer_name, + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res // 2), + kernel_size=1, + add_bias=False, + scale_factor=2, + use_wscale=self.use_wscale, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/Skip/weight') + + # Convolution block for last resolution. + else: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale, + minibatch_std_group_size=minibatch_std_group_size, + minibatch_std_channels=minibatch_std_channels)) + tf_layer0_name = 'Conv' + self.add_module( + f'layer{2 * block_idx + 1}', + DenseBlock(in_channels=self.get_nf(res) * res * res, + out_channels=self.get_nf(res // 2), + use_wscale=self.use_wscale)) + tf_layer1_name = 'Dense0' + + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = ( + f'{res}x{res}/{tf_layer0_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = ( + f'{res}x{res}/{tf_layer0_name}/bias') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = ( + f'{res}x{res}/{tf_layer1_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = ( + f'{res}x{res}/{tf_layer1_name}/bias') + + # Final dense block. + self.add_module( + f'layer{2 * block_idx + 2}', + DenseBlock(in_channels=self.get_nf(res // 2), + out_channels=max(self.label_size, 1), + use_wscale=self.use_wscale, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = ( + f'Output/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = ( + f'Output/bias') + + if self.architecture == 'skip': + self.downsample = DownsamplingLayer() + + def get_nf(self, res): + """Gets number of feature maps according to current resolution.""" + return min(self.fmaps_base // res, self.fmaps_max) + + def forward(self, image, label=None, **_unused_kwargs): + expected_shape = (self.image_channels, self.resolution, self.resolution) + if image.ndim != 4 or image.shape[1:] != expected_shape: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, channel, height, width], where ' + f'`channel` equals to {self.image_channels}, ' + f'`height`, `width` equal to {self.resolution}!\n' + f'But `{image.shape}` is received!') + if self.label_size: + if label is None: + raise ValueError(f'Model requires an additional label ' + f'(with size {self.label_size}) as inputs, ' + f'but no label is received!') + batch_size = image.shape[0] + if label.ndim != 2 or label.shape != (batch_size, self.label_size): + raise ValueError(f'Input label should be with shape ' + f'[batch_size, label_size], where ' + f'`batch_size` equals to that of ' + f'images ({image.shape[0]}) and ' + f'`label_size` equals to {self.label_size}!\n' + f'But `{label.shape}` is received!') + + x = self.input0(image) + for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): + block_idx = self.final_res_log2 - res_log2 + if self.architecture == 'skip' and block_idx > 0: + image = self.downsample(image) + x = x + self.__getattr__(f'input{block_idx}')(image) + if self.architecture == 'resnet' and res_log2 != self.init_res_log2: + residual = self.__getattr__(f'skip_layer{block_idx}')(x) + x = self.__getattr__(f'layer{2 * block_idx}')(x) + x = self.__getattr__(f'layer{2 * block_idx + 1}')(x) + if self.architecture == 'resnet' and res_log2 != self.init_res_log2: + x = (x + residual) / np.sqrt(2.0) + x = self.__getattr__(f'layer{2 * block_idx + 2}')(x) + + if self.label_size: + x = torch.sum(x * label, dim=1, keepdim=True) + return x + + +class MiniBatchSTDLayer(nn.Module): + """Implements the minibatch standard deviation layer.""" + + def __init__(self, group_size=4, new_channels=1, epsilon=1e-8): + super().__init__() + self.group_size = group_size + self.new_channels = new_channels + self.epsilon = epsilon + + def forward(self, x): + if self.group_size <= 1: + return x + ng = min(self.group_size, x.shape[0]) + nc = self.new_channels + temp_c = x.shape[1] // nc # [NCHW] + y = x.view(ng, -1, nc, temp_c, x.shape[2], x.shape[3]) # [GMncHW] + y = y - torch.mean(y, dim=0, keepdim=True) # [GMncHW] + y = torch.mean(y ** 2, dim=0) # [MncHW] + y = torch.sqrt(y + self.epsilon) # [MncHW] + y = torch.mean(y, dim=[2, 3, 4], keepdim=True) # [Mn111] + y = torch.mean(y, dim=2) # [Mn11] + y = y.repeat(ng, 1, x.shape[2], x.shape[3]) # [NnHW] + return torch.cat([x, y], dim=1) + + +class DownsamplingLayer(nn.Module): + """Implements the downsampling layer. + + This layer can also be used as filtering by setting `scale_factor` as 1. + """ + + def __init__(self, scale_factor=2, kernel=(1, 3, 3, 1), extra_padding=0): + super().__init__() + assert scale_factor >= 1 + self.scale_factor = scale_factor + + if extra_padding != 0: + assert scale_factor == 1 + + if kernel is None: + kernel = np.ones((scale_factor), dtype=np.float32) + else: + kernel = np.array(kernel, dtype=np.float32) + assert kernel.ndim == 1 + kernel = np.outer(kernel, kernel) + kernel = kernel / np.sum(kernel) + assert kernel.ndim == 2 + assert kernel.shape[0] == kernel.shape[1] + kernel = kernel[np.newaxis, np.newaxis] + self.register_buffer('kernel', torch.from_numpy(kernel)) + self.kernel = self.kernel.flip(0, 1) + padding = kernel.shape[2] - scale_factor + extra_padding + self.padding = ((padding + 1) // 2, padding // 2, + (padding + 1) // 2, padding // 2) + + def forward(self, x): + assert x.ndim == 4 + channels = x.shape[1] + x = x.view(-1, 1, x.shape[2], x.shape[3]) + x = F.pad(x, self.padding, mode='constant', value=0) + x = F.conv2d(x, self.kernel, stride=self.scale_factor) + x = x.view(-1, channels, x.shape[2], x.shape[3]) + return x + + +class ConvBlock(nn.Module): + """Implements the convolutional block. + + Basically, this block executes minibatch standard deviation layer (if + needed), filtering layer (if needed), convolutional layer, and activation + layer in sequence. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + add_bias=True, + scale_factor=1, + filtering_kernel=(1, 3, 3, 1), + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu', + minibatch_std_group_size=0, + minibatch_std_channels=1): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + kernel_size: Size of the convolutional kernels. (default: 3) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + scale_factor: Scale factor for downsampling. `1` means skip + downsampling. (default: 1) + filtering_kernel: Kernel used for filtering before downsampling. + (default: (1, 3, 3, 1)) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + minibatch_std_group_size: Group size for the minibatch standard + deviation layer. 0 means disable. (default: 0) + minibatch_std_channels: Number of new channels after the minibatch + standard deviation layer. (default: 1) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + if minibatch_std_group_size > 1: + in_channels = in_channels + minibatch_std_channels + self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size, + new_channels=minibatch_std_channels) + else: + self.mbstd = nn.Identity() + + if scale_factor > 1: + extra_padding = kernel_size - scale_factor + self.filter = DownsamplingLayer(scale_factor=1, + kernel=filtering_kernel, + extra_padding=extra_padding) + self.stride = scale_factor + self.padding = 0 # Padding is done in `DownsamplingLayer`. + else: + self.filter = nn.Identity() + assert kernel_size % 2 == 1 + self.stride = 1 + self.padding = kernel_size // 2 + + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + self.bscale = lr_mul + + if activation_type == 'linear': + self.activate = nn.Identity() + self.activate_scale = 1.0 + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + self.activate_scale = np.sqrt(2.0) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + x = self.mbstd(x) + x = self.filter(x) + weight = self.weight * self.wscale + bias = self.bias * self.bscale if self.bias is not None else None + x = F.conv2d(x, + weight=weight, + bias=bias, + stride=self.stride, + padding=self.padding) + x = self.activate(x) * self.activate_scale + return x + + +class DenseBlock(nn.Module): + """Implements the dense block. + + Basically, this block executes fully-connected layer and activation layer. + """ + + def __init__(self, + in_channels, + out_channels, + add_bias=True, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + add_bias: Whether to add bias onto the fully-connected result. + (default: True) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + weight_shape = (out_channels, in_channels) + wscale = wscale_gain / np.sqrt(in_channels) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + self.bscale = lr_mul + + if activation_type == 'linear': + self.activate = nn.Identity() + self.activate_scale = 1.0 + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + self.activate_scale = np.sqrt(2.0) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + if x.ndim != 2: + x = x.view(x.shape[0], -1) + bias = self.bias * self.bscale if self.bias is not None else None + x = F.linear(x, weight=self.weight * self.wscale, bias=bias) + x = self.activate(x) * self.activate_scale + return x diff --git a/ContraCLIP/models/genforce/models/stylegan2_generator.py b/ContraCLIP/models/genforce/models/stylegan2_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..4348965b4c3ae9e02b7086aac5cfd7be370ba6a6 --- /dev/null +++ b/ContraCLIP/models/genforce/models/stylegan2_generator.py @@ -0,0 +1,1081 @@ +# python3.7 +"""Contains the implementation of generator described in StyleGAN2. + +Compared to that of StyleGAN, the generator in StyleGAN2 mainly introduces style +demodulation, adds skip connections, increases model size, and disables +progressive growth. This script ONLY supports config F in the original paper. + +Paper: https://arxiv.org/pdf/1912.04958.pdf + +Official TensorFlow implementation: https://github.com/NVlabs/stylegan2 +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .sync_op import all_gather + +__all__ = ['StyleGAN2Generator'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Initial resolution. +_INIT_RES = 4 + +# Architectures allowed. +_ARCHITECTURES_ALLOWED = ['resnet', 'skip', 'origin'] + +# Default gain factor for weight scaling. +_WSCALE_GAIN = 1.0 + + +class StyleGAN2Generator(nn.Module): + """Defines the generator network in StyleGAN2. + + NOTE: The synthesized images are with `RGB` channel order and pixel range + [-1, 1]. + + Settings for the mapping network: + + (1) z_space_dim: Dimension of the input latent space, Z. (default: 512) + (2) w_space_dim: Dimension of the outout latent space, W. (default: 512) + (3) label_size: Size of the additional label for conditional generation. + (default: 0) + (4)mapping_layers: Number of layers of the mapping network. (default: 8) + (5) mapping_fmaps: Number of hidden channels of the mapping network. + (default: 512) + (6) mapping_lr_mul: Learning rate multiplier for the mapping network. + (default: 0.01) + (7) repeat_w: Repeat w-code for different layers. + + Settings for the synthesis network: + + (1) resolution: The resolution of the output image. + (2) image_channels: Number of channels of the output image. (default: 3) + (3) final_tanh: Whether to use `tanh` to control the final pixel range. + (default: False) + (4) const_input: Whether to use a constant in the first convolutional layer. + (default: True) + (5) architecture: Type of architecture. Support `origin`, `skip`, and + `resnet`. (default: `resnet`) + (6) fused_modulate: Whether to fuse `style_modulate` and `conv2d` together. + (default: True) + (7) demodulate: Whether to perform style demodulation. (default: True) + (8) use_wscale: Whether to use weight scaling. (default: True) + (9) noise_type: Type of noise added to the convolutional results at each + layer. (default: `spatial`) + (10) fmaps_base: Factor to control number of feature maps for each layer. + (default: 32 << 10) + (11) fmaps_max: Maximum number of feature maps in each layer. (default: 512) + """ + + def __init__(self, + resolution, + z_space_dim=512, + w_space_dim=512, + label_size=0, + mapping_layers=8, + mapping_fmaps=512, + mapping_lr_mul=0.01, + repeat_w=True, + image_channels=3, + final_tanh=False, + const_input=True, + architecture='skip', + fused_modulate=True, + demodulate=True, + use_wscale=True, + noise_type='spatial', + fmaps_base=32 << 10, + fmaps_max=512, + latent_is_w=False): + """Initializes with basic settings. + + Raises: + ValueError: If the `resolution` is not supported, or `architecture` + is not supported. + """ + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + if architecture not in _ARCHITECTURES_ALLOWED: + raise ValueError(f'Invalid architecture: `{architecture}`!\n' + f'Architectures allowed: ' + f'{_ARCHITECTURES_ALLOWED}.') + + self.init_res = _INIT_RES + self.resolution = resolution + self.z_space_dim = z_space_dim + self.w_space_dim = w_space_dim + self.label_size = label_size + self.mapping_layers = mapping_layers + self.mapping_fmaps = mapping_fmaps + self.mapping_lr_mul = mapping_lr_mul + self.repeat_w = repeat_w + self.image_channels = image_channels + self.final_tanh = final_tanh + self.const_input = const_input + self.architecture = architecture + self.fused_modulate = fused_modulate + self.demodulate = demodulate + self.use_wscale = use_wscale + self.noise_type = noise_type + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + self.latent_is_w = latent_is_w + + self.num_layers = int(np.log2(self.resolution // self.init_res * 2)) * 2 + + if self.repeat_w: + self.mapping_space_dim = self.w_space_dim + else: + self.mapping_space_dim = self.w_space_dim * self.num_layers + self.mapping = MappingModule(input_space_dim=self.z_space_dim, + hidden_space_dim=self.mapping_fmaps, + final_space_dim=self.mapping_space_dim, + label_size=self.label_size, + num_layers=self.mapping_layers, + use_wscale=self.use_wscale, + lr_mul=self.mapping_lr_mul) + + self.truncation = TruncationModule(w_space_dim=self.w_space_dim, + num_layers=self.num_layers, + repeat_w=self.repeat_w) + + self.synthesis = SynthesisModule(resolution=self.resolution, + init_resolution=self.init_res, + w_space_dim=self.w_space_dim, + image_channels=self.image_channels, + final_tanh=self.final_tanh, + const_input=self.const_input, + architecture=self.architecture, + fused_modulate=self.fused_modulate, + demodulate=self.demodulate, + use_wscale=self.use_wscale, + noise_type=self.noise_type, + fmaps_base=self.fmaps_base, + fmaps_max=self.fmaps_max) + + self.pth_to_tf_var_mapping = {} + for key, val in self.mapping.pth_to_tf_var_mapping.items(): + self.pth_to_tf_var_mapping[f'mapping.{key}'] = val + for key, val in self.truncation.pth_to_tf_var_mapping.items(): + self.pth_to_tf_var_mapping[f'truncation.{key}'] = val + for key, val in self.synthesis.pth_to_tf_var_mapping.items(): + self.pth_to_tf_var_mapping[f'synthesis.{key}'] = val + + def set_space_of_latent(self, space_of_latent='w'): + """Sets the space to which the latent code belong. + + This function is particually used for choosing how to inject the latent + code into the convolutional layers. The original generator will take a + W-Space code and apply it for style modulation after an affine + transformation. But, sometimes, it may need to directly feed an already + affine-transformed code into the convolutional layer, e.g., when + training an encoder for GAN inversion. We term the transformed space as + Style Space (or Y-Space). This function is designed to tell the + convolutional layers how to use the input code. + + Args: + space_of_latent: The space to which the latent code belong. Case + insensitive. (default: 'w') + """ + for module in self.modules(): + if isinstance(module, ModulateConvBlock): + setattr(module, 'space_of_latent', space_of_latent) + + def forward(self, + z, + label=None, + w_moving_decay=0.995, + style_mixing_prob=0.9, + trunc_psi=None, + trunc_layers=None, + randomize_noise=False, + **_unused_kwargs): + # TODO: add comment + if self.latent_is_w: + w = z + else: + mapping_results = self.mapping(z, label) + w = mapping_results['w'] + + if self.training and w_moving_decay < 1: + batch_w_avg = all_gather(w).mean(dim=0) + self.truncation.w_avg.copy_( + self.truncation.w_avg * w_moving_decay + + batch_w_avg * (1 - w_moving_decay)) + + if self.training and style_mixing_prob > 0: + new_z = torch.randn_like(z) + new_w = self.mapping(new_z, label)['w'] + if np.random.uniform() < style_mixing_prob: + mixing_cutoff = np.random.randint(1, self.num_layers) + w = self.truncation(w) + new_w = self.truncation(new_w) + w[:, :mixing_cutoff] = new_w[:, :mixing_cutoff] + + wp = self.truncation(w, trunc_psi, trunc_layers) + synthesis_results = self.synthesis(wp, randomize_noise) + + return synthesis_results['image'] + + def get_w(self, z, truncation, trunc_layers=18, label=None): + mapping_results = self.mapping(z, label) + w = mapping_results['w'] + wp = self.truncation(w, truncation, trunc_layers) + return wp + + +class MappingModule(nn.Module): + """Implements the latent space mapping module. + + Basically, this module executes several dense layers in sequence. + """ + + def __init__(self, + input_space_dim=512, + hidden_space_dim=512, + final_space_dim=512, + label_size=0, + num_layers=8, + normalize_input=True, + use_wscale=True, + lr_mul=0.01): + super().__init__() + + self.input_space_dim = input_space_dim + self.hidden_space_dim = hidden_space_dim + self.final_space_dim = final_space_dim + self.label_size = label_size + self.num_layers = num_layers + self.normalize_input = normalize_input + self.use_wscale = use_wscale + self.lr_mul = lr_mul + + self.norm = PixelNormLayer() if self.normalize_input else nn.Identity() + + self.pth_to_tf_var_mapping = {} + for i in range(num_layers): + dim_mul = 2 if label_size else 1 + in_channels = (input_space_dim * dim_mul if i == 0 else + hidden_space_dim) + out_channels = (final_space_dim if i == (num_layers - 1) else + hidden_space_dim) + self.add_module(f'dense{i}', + DenseBlock(in_channels=in_channels, + out_channels=out_channels, + use_wscale=self.use_wscale, + lr_mul=self.lr_mul)) + self.pth_to_tf_var_mapping[f'dense{i}.weight'] = f'Dense{i}/weight' + self.pth_to_tf_var_mapping[f'dense{i}.bias'] = f'Dense{i}/bias' + if label_size: + self.label_weight = nn.Parameter( + torch.randn(label_size, input_space_dim)) + self.pth_to_tf_var_mapping[f'label_weight'] = f'LabelConcat/weight' + + def forward(self, z, label=None): + if z.ndim != 2 or z.shape[1] != self.input_space_dim: + raise ValueError(f'Input latent code should be with shape ' + f'[batch_size, input_dim], where ' + f'`input_dim` equals to {self.input_space_dim}!\n' + f'But `{z.shape}` is received!') + if self.label_size: + if label is None: + raise ValueError(f'Model requires an additional label ' + f'(with size {self.label_size}) as input, ' + f'but no label is received!') + if label.ndim != 2 or label.shape != (z.shape[0], self.label_size): + raise ValueError(f'Input label should be with shape ' + f'[batch_size, label_size], where ' + f'`batch_size` equals to that of ' + f'latent codes ({z.shape[0]}) and ' + f'`label_size` equals to {self.label_size}!\n' + f'But `{label.shape}` is received!') + embedding = torch.matmul(label, self.label_weight) + z = torch.cat((z, embedding), dim=1) + + z = self.norm(z) + w = z + for i in range(self.num_layers): + w = self.__getattr__(f'dense{i}')(w) + results = { + 'z': z, + 'label': label, + 'w': w, + } + if self.label_size: + results['embedding'] = embedding + return results + + +class TruncationModule(nn.Module): + """Implements the truncation module. + + Truncation is executed as follows: + + For layers in range [0, truncation_layers), the truncated w-code is computed + as + + w_new = w_avg + (w - w_avg) * truncation_psi + + To disable truncation, please set + (1) truncation_psi = 1.0 (None) OR + (2) truncation_layers = 0 (None) + + NOTE: The returned tensor is layer-wise style codes. + """ + + def __init__(self, w_space_dim, num_layers, repeat_w=True): + super().__init__() + + self.num_layers = num_layers + self.w_space_dim = w_space_dim + self.repeat_w = repeat_w + + if self.repeat_w: + self.register_buffer('w_avg', torch.zeros(w_space_dim)) + else: + self.register_buffer('w_avg', torch.zeros(num_layers * w_space_dim)) + self.pth_to_tf_var_mapping = {'w_avg': 'dlatent_avg'} + + def forward(self, w, trunc_psi=None, trunc_layers=None): + if w.ndim == 2: + if self.repeat_w and w.shape[1] == self.w_space_dim: + w = w.view(-1, 1, self.w_space_dim) + wp = w.repeat(1, self.num_layers, 1) + else: + assert w.shape[1] == self.w_space_dim * self.num_layers + wp = w.view(-1, self.num_layers, self.w_space_dim) + else: + wp = w + assert wp.ndim == 3 + assert wp.shape[1:] == (self.num_layers, self.w_space_dim) + + trunc_psi = 1.0 if trunc_psi is None else trunc_psi + trunc_layers = 0 if trunc_layers is None else trunc_layers + if trunc_psi < 1.0 and trunc_layers > 0: + layer_idx = np.arange(self.num_layers).reshape(1, -1, 1) + coefs = np.ones_like(layer_idx, dtype=np.float32) + coefs[layer_idx < trunc_layers] *= trunc_psi + coefs = torch.from_numpy(coefs).to(wp) + w_avg = self.w_avg.view(1, -1, self.w_space_dim) + wp = w_avg + (wp - w_avg) * coefs + return wp + + +class SynthesisModule(nn.Module): + """Implements the image synthesis module. + + Basically, this module executes several convolutional layers in sequence. + """ + + def __init__(self, + resolution=1024, + init_resolution=4, + w_space_dim=512, + image_channels=3, + final_tanh=False, + const_input=True, + architecture='skip', + fused_modulate=True, + demodulate=True, + use_wscale=True, + noise_type='spatial', + fmaps_base=32 << 10, + fmaps_max=512): + super().__init__() + + self.init_res = init_resolution + self.init_res_log2 = int(np.log2(self.init_res)) + self.resolution = resolution + self.final_res_log2 = int(np.log2(self.resolution)) + self.w_space_dim = w_space_dim + self.image_channels = image_channels + self.final_tanh = final_tanh + self.const_input = const_input + self.architecture = architecture + self.fused_modulate = fused_modulate + self.demodulate = demodulate + self.use_wscale = use_wscale + self.noise_type = noise_type + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + + self.num_layers = (self.final_res_log2 - self.init_res_log2 + 1) * 2 + + self.pth_to_tf_var_mapping = {} + for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1): + res = 2 ** res_log2 + block_idx = res_log2 - self.init_res_log2 + + # First convolution layer for each resolution. + if res == self.init_res: + if self.const_input: + self.add_module(f'early_layer', + InputBlock(init_resolution=self.init_res, + channels=self.get_nf(res))) + self.pth_to_tf_var_mapping[f'early_layer.const'] = ( + f'{res}x{res}/Const/const') + else: + self.add_module(f'early_layer', + DenseBlock(in_channels=self.w_space_dim, + out_channels=self.get_nf(res), + use_wscale=self.use_wscale)) + self.pth_to_tf_var_mapping[f'early_layer.weight'] = ( + f'{res}x{res}/Dense/weight') + self.pth_to_tf_var_mapping[f'early_layer.bias'] = ( + f'{res}x{res}/Dense/bias') + else: + layer_name = f'layer{2 * block_idx - 1}' + self.add_module( + layer_name, + ModulateConvBlock(in_channels=self.get_nf(res // 2), + out_channels=self.get_nf(res), + resolution=res, + w_space_dim=self.w_space_dim, + scale_factor=2, + fused_modulate=self.fused_modulate, + demodulate=self.demodulate, + use_wscale=self.use_wscale, + noise_type=self.noise_type)) + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/Conv0_up/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = ( + f'{res}x{res}/Conv0_up/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = ( + f'{res}x{res}/Conv0_up/mod_weight') + self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = ( + f'{res}x{res}/Conv0_up/mod_bias') + self.pth_to_tf_var_mapping[f'{layer_name}.noise_strength'] = ( + f'{res}x{res}/Conv0_up/noise_strength') + self.pth_to_tf_var_mapping[f'{layer_name}.noise'] = ( + f'noise{2 * block_idx - 1}') + + if self.architecture == 'resnet': + layer_name = f'layer{2 * block_idx - 1}' + self.add_module( + layer_name, + ConvBlock(in_channels=self.get_nf(res // 2), + out_channels=self.get_nf(res), + kernel_size=1, + add_bias=False, + scale_factor=2, + use_wscale=self.use_wscale, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/Skip/weight') + + # Second convolution layer for each resolution. + layer_name = f'layer{2 * block_idx}' + self.add_module( + layer_name, + ModulateConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + resolution=res, + w_space_dim=self.w_space_dim, + fused_modulate=self.fused_modulate, + demodulate=self.demodulate, + use_wscale=self.use_wscale, + noise_type=self.noise_type)) + tf_layer_name = 'Conv' if res == self.init_res else 'Conv1' + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/{tf_layer_name}/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = ( + f'{res}x{res}/{tf_layer_name}/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = ( + f'{res}x{res}/{tf_layer_name}/mod_weight') + self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = ( + f'{res}x{res}/{tf_layer_name}/mod_bias') + self.pth_to_tf_var_mapping[f'{layer_name}.noise_strength'] = ( + f'{res}x{res}/{tf_layer_name}/noise_strength') + self.pth_to_tf_var_mapping[f'{layer_name}.noise'] = ( + f'noise{2 * block_idx}') + + # Output convolution layer for each resolution (if needed). + if res_log2 == self.final_res_log2 or self.architecture == 'skip': + layer_name = f'output{block_idx}' + self.add_module( + layer_name, + ModulateConvBlock(in_channels=self.get_nf(res), + out_channels=image_channels, + resolution=res, + w_space_dim=self.w_space_dim, + kernel_size=1, + fused_modulate=self.fused_modulate, + demodulate=False, + use_wscale=self.use_wscale, + add_noise=False, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/ToRGB/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = ( + f'{res}x{res}/ToRGB/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = ( + f'{res}x{res}/ToRGB/mod_weight') + self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = ( + f'{res}x{res}/ToRGB/mod_bias') + + if self.architecture == 'skip': + self.upsample = UpsamplingLayer() + self.final_activate = nn.Tanh() if final_tanh else nn.Identity() + + def get_nf(self, res): + """Gets number of feature maps according to current resolution.""" + return min(self.fmaps_base // res, self.fmaps_max) + + def forward(self, wp, randomize_noise=False): + results = {'wp': wp} + x = self.early_layer(wp[:, 0]) + if self.architecture == 'origin': + for layer_idx in range(self.num_layers - 1): + x, style = self.__getattr__(f'layer{layer_idx}')( + x, wp[:, layer_idx], randomize_noise) + results[f'style{layer_idx:02d}'] = style + image, style = self.__getattr__(f'output{layer_idx // 2}')( + x, wp[:, layer_idx + 1]) + results[f'output_style{layer_idx // 2}'] = style + elif self.architecture == 'skip': + for layer_idx in range(self.num_layers - 1): + x, style = self.__getattr__(f'layer{layer_idx}')( + x, wp[:, layer_idx], randomize_noise) + results[f'style{layer_idx:02d}'] = style + if layer_idx % 2 == 0: + temp, style = self.__getattr__(f'output{layer_idx // 2}')( + x, wp[:, layer_idx + 1]) + results[f'output_style{layer_idx // 2}'] = style + if layer_idx == 0: + image = temp + else: + image = temp + self.upsample(image) + elif self.architecture == 'resnet': + x, style = self.layer0(x) + results[f'style00'] = style + for layer_idx in range(1, self.num_layers - 1, 2): + residual = self.__getattr__(f'skip_layer{layer_idx // 2}')(x) + x, style = self.__getattr__(f'layer{layer_idx}')( + x, wp[:, layer_idx], randomize_noise) + results[f'style{layer_idx:02d}'] = style + x, style = self.__getattr__(f'layer{layer_idx + 1}')( + x, wp[:, layer_idx + 1], randomize_noise) + results[f'style{layer_idx + 1:02d}'] = style + x = (x + residual) / np.sqrt(2.0) + image, style = self.__getattr__(f'output{layer_idx // 2 + 1}')( + x, wp[:, layer_idx + 2]) + results[f'output_style{layer_idx // 2}'] = style + results['image'] = self.final_activate(image) + return results + + +class PixelNormLayer(nn.Module): + """Implements pixel-wise feature vector normalization layer.""" + + def __init__(self, dim=1, epsilon=1e-8): + super().__init__() + self.dim = dim + self.eps = epsilon + + def forward(self, x): + norm = torch.sqrt( + torch.mean(x ** 2, dim=self.dim, keepdim=True) + self.eps) + return x / norm + + +class UpsamplingLayer(nn.Module): + """Implements the upsampling layer. + + This layer can also be used as filtering by setting `scale_factor` as 1. + """ + + def __init__(self, + scale_factor=2, + kernel=(1, 3, 3, 1), + extra_padding=0, + kernel_gain=None): + super().__init__() + assert scale_factor >= 1 + self.scale_factor = scale_factor + + if extra_padding != 0: + assert scale_factor == 1 + + if kernel is None: + kernel = np.ones((scale_factor), dtype=np.float32) + else: + kernel = np.array(kernel, dtype=np.float32) + assert kernel.ndim == 1 + kernel = np.outer(kernel, kernel) + kernel = kernel / np.sum(kernel) + if kernel_gain is None: + kernel = kernel * (scale_factor ** 2) + else: + assert kernel_gain > 0 + kernel = kernel * (kernel_gain ** 2) + assert kernel.ndim == 2 + assert kernel.shape[0] == kernel.shape[1] + kernel = kernel[np.newaxis, np.newaxis] + self.register_buffer('kernel', torch.from_numpy(kernel)) + self.kernel = self.kernel.flip(0, 1) + + self.upsample_padding = (0, scale_factor - 1, # Width padding. + 0, 0, # Width. + 0, scale_factor - 1, # Height padding. + 0, 0, # Height. + 0, 0, # Channel. + 0, 0) # Batch size. + + padding = kernel.shape[2] - scale_factor + extra_padding + self.padding = ((padding + 1) // 2 + scale_factor - 1, padding // 2, + (padding + 1) // 2 + scale_factor - 1, padding // 2) + + def forward(self, x): + assert x.ndim == 4 + channels = x.shape[1] + if self.scale_factor > 1: + x = x.view(-1, channels, x.shape[2], 1, x.shape[3], 1) + x = F.pad(x, self.upsample_padding, mode='constant', value=0) + x = x.view(-1, channels, x.shape[2] * self.scale_factor, + x.shape[4] * self.scale_factor) + x = x.view(-1, 1, x.shape[2], x.shape[3]) + x = F.pad(x, self.padding, mode='constant', value=0) + x = F.conv2d(x, self.kernel, stride=1) + x = x.view(-1, channels, x.shape[2], x.shape[3]) + return x + + +class InputBlock(nn.Module): + """Implements the input block. + + Basically, this block starts from a const input, which is with shape + `(channels, init_resolution, init_resolution)`. + """ + + def __init__(self, init_resolution, channels): + super().__init__() + self.const = nn.Parameter( + torch.randn(1, channels, init_resolution, init_resolution)) + + def forward(self, w): + x = self.const.repeat(w.shape[0], 1, 1, 1) + return x + + +class ConvBlock(nn.Module): + """Implements the convolutional block (no style modulation). + + Basically, this block executes, convolutional layer, filtering layer (if + needed), and activation layer in sequence. + + NOTE: This block is particularly used for skip-connection branch in the + `resnet` structure. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + add_bias=True, + scale_factor=1, + filtering_kernel=(1, 3, 3, 1), + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + kernel_size: Size of the convolutional kernels. (default: 3) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + scale_factor: Scale factor for upsampling. `1` means skip + upsampling. (default: 1) + filtering_kernel: Kernel used for filtering after upsampling. + (default: (1, 3, 3, 1)) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + if scale_factor > 1: + self.use_conv2d_transpose = True + extra_padding = scale_factor - kernel_size + self.filter = UpsamplingLayer(scale_factor=1, + kernel=filtering_kernel, + extra_padding=extra_padding, + kernel_gain=scale_factor) + self.stride = scale_factor + self.padding = 0 # Padding is done in `UpsamplingLayer`. + else: + self.use_conv2d_transpose = False + assert kernel_size % 2 == 1 + self.stride = 1 + self.padding = kernel_size // 2 + + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + self.bscale = lr_mul + + if activation_type == 'linear': + self.activate = nn.Identity() + self.activate_scale = 1.0 + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + self.activate_scale = np.sqrt(2.0) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + weight = self.weight * self.wscale + bias = self.bias * self.bscale if self.bias is not None else None + if self.use_conv2d_transpose: + weight = weight.permute(1, 0, 2, 3).flip(2, 3) + x = F.conv_transpose2d(x, + weight=weight, + bias=bias, + stride=self.scale_factor, + padding=self.padding) + x = self.filter(x) + else: + x = F.conv2d(x, + weight=weight, + bias=bias, + stride=self.stride, + padding=self.padding) + x = self.activate(x) * self.activate_scale + return x + + +class ModulateConvBlock(nn.Module): + """Implements the convolutional block with style modulation.""" + + def __init__(self, + in_channels, + out_channels, + resolution, + w_space_dim, + kernel_size=3, + add_bias=True, + scale_factor=1, + filtering_kernel=(1, 3, 3, 1), + fused_modulate=True, + demodulate=True, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + add_noise=True, + noise_type='spatial', + activation_type='lrelu', + epsilon=1e-8): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + resolution: Resolution of the output tensor. + w_space_dim: Dimension of W space for style modulation. + kernel_size: Size of the convolutional kernels. (default: 3) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + scale_factor: Scale factor for upsampling. `1` means skip + upsampling. (default: 1) + filtering_kernel: Kernel used for filtering after upsampling. + (default: (1, 3, 3, 1)) + fused_modulate: Whether to fuse `style_modulate` and `conv2d` + together. (default: True) + demodulate: Whether to perform style demodulation. (default: True) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + add_noise: Whether to add noise onto the output tensor. (default: + True) + noise_type: Type of noise added to the feature map after the + convolution (if needed). Support `spatial` and `channel`. + (default: `spatial`) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + epsilon: Small number to avoid `divide by zero`. (default: 1e-8) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + self.in_c = in_channels + self.out_c = out_channels + self.res = resolution + self.w_space_dim = w_space_dim + self.ksize = kernel_size + self.eps = epsilon + self.space_of_latent = 'w' + + if scale_factor > 1: + self.use_conv2d_transpose = True + extra_padding = scale_factor - kernel_size + self.filter = UpsamplingLayer(scale_factor=1, + kernel=filtering_kernel, + extra_padding=extra_padding, + kernel_gain=scale_factor) + self.stride = scale_factor + self.padding = 0 # Padding is done in `UpsamplingLayer`. + else: + self.use_conv2d_transpose = False + assert kernel_size % 2 == 1 + self.stride = 1 + self.padding = kernel_size // 2 + + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + self.style = DenseBlock(in_channels=w_space_dim, + out_channels=in_channels, + additional_bias=1.0, + use_wscale=use_wscale, + activation_type='linear') + + self.fused_modulate = fused_modulate + self.demodulate = demodulate + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + self.bscale = lr_mul + + if activation_type == 'linear': + self.activate = nn.Identity() + self.activate_scale = 1.0 + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + self.activate_scale = np.sqrt(2.0) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + self.add_noise = add_noise + if self.add_noise: + self.noise_type = noise_type.lower() + if self.noise_type == 'spatial': + self.register_buffer('noise', + torch.randn(1, 1, self.res, self.res)) + elif self.noise_type == 'channel': + self.register_buffer('noise', + torch.randn(1, self.channels, 1, 1)) + else: + raise NotImplementedError(f'Not implemented noise type: ' + f'`{self.noise_type}`!') + self.noise_strength = nn.Parameter(torch.zeros(())) + + def forward_style(self, w): + """Gets style code from the given input. + + More specifically, if the input is from W-Space, it will be projected by + an affine transformation. If it is from the Style Space (Y-Space), no + operation is required. + + NOTE: For codes from Y-Space, we use slicing to make sure the dimension + is correct, in case that the code is padded before fed into this layer. + """ + if self.space_of_latent == 'w': + if w.ndim != 2 or w.shape[1] != self.w_space_dim: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, w_space_dim], where ' + f'`w_space_dim` equals to ' + f'{self.w_space_dim}!\n' + f'But `{w.shape}` is received!') + style = self.style(w) + elif self.space_of_latent == 'y': + if w.ndim != 2 or w.shape[1] < self.in_c: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, y_space_dim], where ' + f'`y_space_dim` equals to {self.in_c}!\n' + f'But `{w.shape}` is received!') + style = w[:, :self.in_c] + return style + + def forward(self, x, w, randomize_noise=False): + batch = x.shape[0] + + weight = self.weight * self.wscale + weight = weight.permute(2, 3, 1, 0) + + # Style modulation. + style = self.forward_style(w) + _weight = weight.view(1, self.ksize, self.ksize, self.in_c, self.out_c) + _weight = _weight * style.view(batch, 1, 1, self.in_c, 1) + + # Style demodulation. + if self.demodulate: + _weight_norm = torch.sqrt( + torch.sum(_weight ** 2, dim=[1, 2, 3]) + self.eps) + _weight = _weight / _weight_norm.view(batch, 1, 1, 1, self.out_c) + + if self.fused_modulate: + x = x.view(1, batch * self.in_c, x.shape[2], x.shape[3]) + weight = _weight.permute(1, 2, 3, 0, 4).reshape( + self.ksize, self.ksize, self.in_c, batch * self.out_c) + else: + x = x * style.view(batch, self.in_c, 1, 1) + + if self.use_conv2d_transpose: + weight = weight.flip(0, 1) + if self.fused_modulate: + weight = weight.view( + self.ksize, self.ksize, self.in_c, batch, self.out_c) + weight = weight.permute(0, 1, 4, 3, 2) + weight = weight.reshape( + self.ksize, self.ksize, self.out_c, batch * self.in_c) + weight = weight.permute(3, 2, 0, 1) + else: + weight = weight.permute(2, 3, 0, 1) + x = F.conv_transpose2d(x, + weight=weight, + bias=None, + stride=self.stride, + padding=self.padding, + groups=(batch if self.fused_modulate else 1)) + x = self.filter(x) + else: + weight = weight.permute(3, 2, 0, 1) + x = F.conv2d(x, + weight=weight, + bias=None, + stride=self.stride, + padding=self.padding, + groups=(batch if self.fused_modulate else 1)) + + if self.fused_modulate: + x = x.view(batch, self.out_c, self.res, self.res) + elif self.demodulate: + x = x / _weight_norm.view(batch, self.out_c, 1, 1) + + if self.add_noise: + if randomize_noise: + if self.noise_type == 'spatial': + noise = torch.randn(x.shape[0], 1, self.res, self.res).to(x) + elif self.noise_type == 'channel': + noise = torch.randn(x.shape[0], self.channels, 1, 1).to(x) + else: + noise = self.noise + x = x + noise * self.noise_strength.view(1, 1, 1, 1) + + bias = self.bias * self.bscale if self.bias is not None else None + if bias is not None: + x = x + bias.view(1, -1, 1, 1) + x = self.activate(x) * self.activate_scale + return x, style + + +class DenseBlock(nn.Module): + """Implements the dense block. + + Basically, this block executes fully-connected layer and activation layer. + + NOTE: This layer supports adding an additional bias beyond the trainable + bias parameter. This is specially used for the mapping from the w code to + the style code. + """ + + def __init__(self, + in_channels, + out_channels, + add_bias=True, + additional_bias=0, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + add_bias: Whether to add bias onto the fully-connected result. + (default: True) + additional_bias: The additional bias, which is independent from the + bias parameter. (default: 0.0) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + weight_shape = (out_channels, in_channels) + wscale = wscale_gain / np.sqrt(in_channels) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + else: + self.bias = None + self.bscale = lr_mul + self.additional_bias = additional_bias + + if activation_type == 'linear': + self.activate = nn.Identity() + self.activate_scale = 1.0 + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + self.activate_scale = np.sqrt(2.0) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + if x.ndim != 2: + x = x.view(x.shape[0], -1) + bias = self.bias * self.bscale if self.bias is not None else None + x = F.linear(x, weight=self.weight * self.wscale, bias=bias) + x = self.activate(x + self.additional_bias) * self.activate_scale + return x + + def get_w(self, z, truncation, trunc_layers=18, label=None): + mapping_results = self.mapping(z, label) + w = mapping_results['w'] + wp = self.truncation(w, trunc_psi, trunc_layers) + return wp diff --git a/ContraCLIP/models/genforce/models/stylegan_discriminator.py b/ContraCLIP/models/genforce/models/stylegan_discriminator.py new file mode 100644 index 0000000000000000000000000000000000000000..7fb95b59b9e2bbd8fd7b1dfaa661ef94fdf5ce8b --- /dev/null +++ b/ContraCLIP/models/genforce/models/stylegan_discriminator.py @@ -0,0 +1,530 @@ +# python3.7 +"""Contains the implementation of discriminator described in StyleGAN. + +Paper: https://arxiv.org/pdf/1812.04948.pdf + +Official TensorFlow implementation: https://github.com/NVlabs/stylegan +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +__all__ = ['StyleGANDiscriminator'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Initial resolution. +_INIT_RES = 4 + +# Fused-scale options allowed. +_FUSED_SCALE_ALLOWED = [True, False, 'auto'] + +# Minimal resolution for `auto` fused-scale strategy. +_AUTO_FUSED_SCALE_MIN_RES = 128 + +# Default gain factor for weight scaling. +_WSCALE_GAIN = np.sqrt(2.0) + + +class StyleGANDiscriminator(nn.Module): + """Defines the discriminator network in StyleGAN. + + NOTE: The discriminator takes images with `RGB` channel order and pixel + range [-1, 1] as inputs. + + Settings for the network: + + (1) resolution: The resolution of the input image. + (2) image_channels: Number of channels of the input image. (default: 3) + (3) label_size: Size of the additional label for conditional generation. + (default: 0) + (4) fused_scale: Whether to fused `conv2d` and `downsample` together, + resulting in `conv2d` with strides. (default: `auto`) + (5) use_wscale: Whether to use weight scaling. (default: True) + (6) minibatch_std_group_size: Group size for the minibatch standard + deviation layer. 0 means disable. (default: 4) + (7) minibatch_std_channels: Number of new channels after the minibatch + standard deviation layer. (default: 1) + (8) fmaps_base: Factor to control number of feature maps for each layer. + (default: 16 << 10) + (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512) + """ + + def __init__(self, + resolution, + image_channels=3, + label_size=0, + fused_scale='auto', + use_wscale=True, + minibatch_std_group_size=4, + minibatch_std_channels=1, + fmaps_base=16 << 10, + fmaps_max=512): + """Initializes with basic settings. + + Raises: + ValueError: If the `resolution` is not supported, or `fused_scale` + is not supported. + """ + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + if fused_scale not in _FUSED_SCALE_ALLOWED: + raise ValueError(f'Invalid fused-scale option: `{fused_scale}`!\n' + f'Options allowed: {_FUSED_SCALE_ALLOWED}.') + + self.init_res = _INIT_RES + self.init_res_log2 = int(np.log2(self.init_res)) + self.resolution = resolution + self.final_res_log2 = int(np.log2(self.resolution)) + self.image_channels = image_channels + self.label_size = label_size + self.fused_scale = fused_scale + self.use_wscale = use_wscale + self.minibatch_std_group_size = minibatch_std_group_size + self.minibatch_std_channels = minibatch_std_channels + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + + # Level of detail (used for progressive training). + self.register_buffer('lod', torch.zeros(())) + self.pth_to_tf_var_mapping = {'lod': 'lod'} + + for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): + res = 2 ** res_log2 + block_idx = self.final_res_log2 - res_log2 + + # Input convolution layer for each resolution. + self.add_module( + f'input{block_idx}', + ConvBlock(in_channels=self.image_channels, + out_channels=self.get_nf(res), + kernel_size=1, + padding=0, + use_wscale=self.use_wscale)) + self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = ( + f'FromRGB_lod{block_idx}/weight') + self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = ( + f'FromRGB_lod{block_idx}/bias') + + # Convolution block for each resolution (except the last one). + if res != self.init_res: + if self.fused_scale == 'auto': + fused_scale = (res >= _AUTO_FUSED_SCALE_MIN_RES) + else: + fused_scale = self.fused_scale + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale)) + tf_layer0_name = 'Conv0' + self.add_module( + f'layer{2 * block_idx + 1}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res // 2), + downsample=True, + fused_scale=fused_scale, + use_wscale=self.use_wscale)) + tf_layer1_name = 'Conv1_down' + + # Convolution block for last resolution. + else: + self.add_module( + f'layer{2 * block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale, + minibatch_std_group_size=minibatch_std_group_size, + minibatch_std_channels=minibatch_std_channels)) + tf_layer0_name = 'Conv' + self.add_module( + f'layer{2 * block_idx + 1}', + DenseBlock(in_channels=self.get_nf(res) * res * res, + out_channels=self.get_nf(res // 2), + use_wscale=self.use_wscale)) + tf_layer1_name = 'Dense0' + + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = ( + f'{res}x{res}/{tf_layer0_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = ( + f'{res}x{res}/{tf_layer0_name}/bias') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = ( + f'{res}x{res}/{tf_layer1_name}/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = ( + f'{res}x{res}/{tf_layer1_name}/bias') + + # Final dense block. + self.add_module( + f'layer{2 * block_idx + 2}', + DenseBlock(in_channels=self.get_nf(res // 2), + out_channels=max(self.label_size, 1), + use_wscale=self.use_wscale, + wscale_gain=1.0, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = ( + f'{res}x{res}/Dense1/weight') + self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = ( + f'{res}x{res}/Dense1/bias') + + self.downsample = DownsamplingLayer() + + def get_nf(self, res): + """Gets number of feature maps according to current resolution.""" + return min(self.fmaps_base // res, self.fmaps_max) + + def forward(self, image, label=None, lod=None, **_unused_kwargs): + expected_shape = (self.image_channels, self.resolution, self.resolution) + if image.ndim != 4 or image.shape[1:] != expected_shape: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, channel, height, width], where ' + f'`channel` equals to {self.image_channels}, ' + f'`height`, `width` equal to {self.resolution}!\n' + f'But `{image.shape}` is received!') + + lod = self.lod.cpu().tolist() if lod is None else lod + if lod + self.init_res_log2 > self.final_res_log2: + raise ValueError(f'Maximum level-of-detail (lod) is ' + f'{self.final_res_log2 - self.init_res_log2}, ' + f'but `{lod}` is received!') + + if self.label_size: + if label is None: + raise ValueError(f'Model requires an additional label ' + f'(with size {self.label_size}) as input, ' + f'but no label is received!') + batch_size = image.shape[0] + if label.ndim != 2 or label.shape != (batch_size, self.label_size): + raise ValueError(f'Input label should be with shape ' + f'[batch_size, label_size], where ' + f'`batch_size` equals to that of ' + f'images ({image.shape[0]}) and ' + f'`label_size` equals to {self.label_size}!\n' + f'But `{label.shape}` is received!') + + for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): + block_idx = current_lod = self.final_res_log2 - res_log2 + if current_lod <= lod < current_lod + 1: + x = self.__getattr__(f'input{block_idx}')(image) + elif current_lod - 1 < lod < current_lod: + alpha = lod - np.floor(lod) + x = (self.__getattr__(f'input{block_idx}')(image) * alpha + + x * (1 - alpha)) + if lod < current_lod + 1: + x = self.__getattr__(f'layer{2 * block_idx}')(x) + x = self.__getattr__(f'layer{2 * block_idx + 1}')(x) + if lod > current_lod: + image = self.downsample(image) + x = self.__getattr__(f'layer{2 * block_idx + 2}')(x) + + if self.label_size: + x = torch.sum(x * label, dim=1, keepdim=True) + + return x + + +class MiniBatchSTDLayer(nn.Module): + """Implements the minibatch standard deviation layer.""" + + def __init__(self, group_size=4, new_channels=1, epsilon=1e-8): + super().__init__() + self.group_size = group_size + self.new_channels = new_channels + self.epsilon = epsilon + + def forward(self, x): + if self.group_size <= 1: + return x + ng = min(self.group_size, x.shape[0]) + nc = self.new_channels + temp_c = x.shape[1] // nc # [NCHW] + y = x.view(ng, -1, nc, temp_c, x.shape[2], x.shape[3]) # [GMncHW] + y = y - torch.mean(y, dim=0, keepdim=True) # [GMncHW] + y = torch.mean(y ** 2, dim=0) # [MncHW] + y = torch.sqrt(y + self.epsilon) # [MncHW] + y = torch.mean(y, dim=[2, 3, 4], keepdim=True) # [Mn111] + y = torch.mean(y, dim=2) # [Mn11] + y = y.repeat(ng, 1, x.shape[2], x.shape[3]) # [NnHW] + return torch.cat([x, y], dim=1) + + +class DownsamplingLayer(nn.Module): + """Implements the downsampling layer. + + Basically, this layer can be used to downsample feature maps with average + pooling. + """ + + def __init__(self, scale_factor=2): + super().__init__() + self.scale_factor = scale_factor + + def forward(self, x): + if self.scale_factor <= 1: + return x + return F.avg_pool2d(x, + kernel_size=self.scale_factor, + stride=self.scale_factor, + padding=0) + + +class Blur(torch.autograd.Function): + """Defines blur operation with customized gradient computation.""" + + @staticmethod + def forward(ctx, x, kernel): + ctx.save_for_backward(kernel) + y = F.conv2d(input=x, + weight=kernel, + bias=None, + stride=1, + padding=1, + groups=x.shape[1]) + return y + + @staticmethod + def backward(ctx, dy): + kernel, = ctx.saved_tensors + dx = BlurBackPropagation.apply(dy, kernel) + return dx, None, None + + +class BlurBackPropagation(torch.autograd.Function): + """Defines the back propagation of blur operation. + + NOTE: This is used to speed up the backward of gradient penalty. + """ + + @staticmethod + def forward(ctx, dy, kernel): + ctx.save_for_backward(kernel) + dx = F.conv2d(input=dy, + weight=kernel.flip((2, 3)), + bias=None, + stride=1, + padding=1, + groups=dy.shape[1]) + return dx + + @staticmethod + def backward(ctx, ddx): + kernel, = ctx.saved_tensors + ddy = F.conv2d(input=ddx, + weight=kernel, + bias=None, + stride=1, + padding=1, + groups=ddx.shape[1]) + return ddy, None, None + + +class BlurLayer(nn.Module): + """Implements the blur layer.""" + + def __init__(self, + channels, + kernel=(1, 2, 1), + normalize=True): + super().__init__() + kernel = np.array(kernel, dtype=np.float32).reshape(1, -1) + kernel = kernel.T.dot(kernel) + if normalize: + kernel = kernel / np.sum(kernel) + kernel = kernel[np.newaxis, np.newaxis] + kernel = np.tile(kernel, [channels, 1, 1, 1]) + self.register_buffer('kernel', torch.from_numpy(kernel)) + + def forward(self, x): + return Blur.apply(x, self.kernel) + + +class ConvBlock(nn.Module): + """Implements the convolutional block. + + Basically, this block executes minibatch standard deviation layer (if + needed), convolutional layer, activation layer, and downsampling layer ( + if needed) in sequence. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + add_bias=True, + downsample=False, + fused_scale=False, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu', + minibatch_std_group_size=0, + minibatch_std_channels=1): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + kernel_size: Size of the convolutional kernels. (default: 3) + stride: Stride parameter for convolution operation. (default: 1) + padding: Padding parameter for convolution operation. (default: 1) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + downsample: Whether to downsample the result after convolution. + (default: False) + fused_scale: Whether to fused `conv2d` and `downsample` together, + resulting in `conv2d` with strides. (default: False) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + minibatch_std_group_size: Group size for the minibatch standard + deviation layer. 0 means disable. (default: 0) + minibatch_std_channels: Number of new channels after the minibatch + standard deviation layer. (default: 1) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + if minibatch_std_group_size > 1: + in_channels = in_channels + minibatch_std_channels + self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size, + new_channels=minibatch_std_channels) + else: + self.mbstd = nn.Identity() + + if downsample: + self.blur = BlurLayer(channels=in_channels) + else: + self.blur = nn.Identity() + + if downsample and not fused_scale: + self.downsample = DownsamplingLayer() + else: + self.downsample = nn.Identity() + + if downsample and fused_scale: + self.use_stride = True + self.stride = 2 + self.padding = 1 + else: + self.use_stride = False + self.stride = stride + self.padding = padding + + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + self.bscale = lr_mul + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + x = self.mbstd(x) + x = self.blur(x) + weight = self.weight * self.wscale + bias = self.bias * self.bscale if self.bias is not None else None + if self.use_stride: + weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0) + weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] + + weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) * 0.25 + x = F.conv2d(x, + weight=weight, + bias=bias, + stride=self.stride, + padding=self.padding) + x = self.downsample(x) + x = self.activate(x) + return x + + +class DenseBlock(nn.Module): + """Implements the dense block. + + Basically, this block executes fully-connected layer and activation layer. + """ + + def __init__(self, + in_channels, + out_channels, + add_bias=True, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + add_bias: Whether to add bias onto the fully-connected result. + (default: True) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + weight_shape = (out_channels, in_channels) + wscale = wscale_gain / np.sqrt(in_channels) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + self.bscale = lr_mul + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + if x.ndim != 2: + x = x.view(x.shape[0], -1) + bias = self.bias * self.bscale if self.bias is not None else None + x = F.linear(x, weight=self.weight * self.wscale, bias=bias) + x = self.activate(x) + return x diff --git a/ContraCLIP/models/genforce/models/stylegan_generator.py b/ContraCLIP/models/genforce/models/stylegan_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..e41677a19081161d79c7a9c94fe86a5ba9fa5372 --- /dev/null +++ b/ContraCLIP/models/genforce/models/stylegan_generator.py @@ -0,0 +1,955 @@ +# python3.7 +"""Contains the implementation of generator described in StyleGAN. + +Paper: https://arxiv.org/pdf/1812.04948.pdf + +Official TensorFlow implementation: https://github.com/NVlabs/stylegan +""" + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .sync_op import all_gather + +__all__ = ['StyleGANGenerator'] + +# Resolutions allowed. +_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] + +# Initial resolution. +_INIT_RES = 4 + +# Fused-scale options allowed. +_FUSED_SCALE_ALLOWED = [True, False, 'auto'] + +# Minimal resolution for `auto` fused-scale strategy. +_AUTO_FUSED_SCALE_MIN_RES = 128 + +# Default gain factor for weight scaling. +_WSCALE_GAIN = np.sqrt(2.0) +_STYLEMOD_WSCALE_GAIN = 1.0 + + +class StyleGANGenerator(nn.Module): + """Defines the generator network in StyleGAN. + + NOTE: The synthesized images are with `RGB` channel order and pixel range + [-1, 1]. + + Settings for the mapping network: + + (1) z_space_dim: Dimension of the input latent space, Z. (default: 512) + (2) w_space_dim: Dimension of the outout latent space, W. (default: 512) + (3) label_size: Size of the additional label for conditional generation. + (default: 0) + (4)mapping_layers: Number of layers of the mapping network. (default: 8) + (5) mapping_fmaps: Number of hidden channels of the mapping network. + (default: 512) + (6) mapping_lr_mul: Learning rate multiplier for the mapping network. + (default: 0.01) + (7) repeat_w: Repeat w-code for different layers. + + Settings for the synthesis network: + + (1) resolution: The resolution of the output image. + (2) image_channels: Number of channels of the output image. (default: 3) + (3) final_tanh: Whether to use `tanh` to control the final pixel range. + (default: False) + (4) const_input: Whether to use a constant in the first convolutional layer. + (default: True) + (5) fused_scale: Whether to fused `upsample` and `conv2d` together, + resulting in `conv2d_transpose`. (default: `auto`) + (6) use_wscale: Whether to use weight scaling. (default: True) + (7) noise_type: Type of noise added to the convolutional results at each + layer. (default: `spatial`) + (8) fmaps_base: Factor to control number of feature maps for each layer. + (default: 16 << 10) + (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512) + """ + + def __init__(self, + resolution, + z_space_dim=512, + w_space_dim=512, + label_size=0, + mapping_layers=8, + mapping_fmaps=512, + mapping_lr_mul=0.01, + repeat_w=True, + image_channels=3, + final_tanh=False, + const_input=True, + fused_scale='auto', + use_wscale=True, + noise_type='spatial', + fmaps_base=16 << 10, + fmaps_max=512, + latent_is_w=False): + """Initializes with basic settings. + + Raises: + ValueError: If the `resolution` is not supported, or `fused_scale` + is not supported. + """ + super().__init__() + + if resolution not in _RESOLUTIONS_ALLOWED: + raise ValueError(f'Invalid resolution: `{resolution}`!\n' + f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + if fused_scale not in _FUSED_SCALE_ALLOWED: + raise ValueError(f'Invalid fused-scale option: `{fused_scale}`!\n' + f'Options allowed: {_FUSED_SCALE_ALLOWED}.') + + self.init_res = _INIT_RES + self.resolution = resolution + self.z_space_dim = z_space_dim + self.w_space_dim = w_space_dim + self.label_size = label_size + self.mapping_layers = mapping_layers + self.mapping_fmaps = mapping_fmaps + self.mapping_lr_mul = mapping_lr_mul + self.repeat_w = repeat_w + self.image_channels = image_channels + self.final_tanh = final_tanh + self.const_input = const_input + self.fused_scale = fused_scale + self.use_wscale = use_wscale + self.noise_type = noise_type + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + self.latent_is_w = latent_is_w + + self.num_layers = int(np.log2(self.resolution // self.init_res * 2)) * 2 + + if self.repeat_w: + self.mapping_space_dim = self.w_space_dim + else: + self.mapping_space_dim = self.w_space_dim * self.num_layers + self.mapping = MappingModule(input_space_dim=self.z_space_dim, + hidden_space_dim=self.mapping_fmaps, + final_space_dim=self.mapping_space_dim, + label_size=self.label_size, + num_layers=self.mapping_layers, + use_wscale=self.use_wscale, + lr_mul=self.mapping_lr_mul) + + self.truncation = TruncationModule(w_space_dim=self.w_space_dim, + num_layers=self.num_layers, + repeat_w=self.repeat_w) + + self.synthesis = SynthesisModule(resolution=self.resolution, + init_resolution=self.init_res, + w_space_dim=self.w_space_dim, + image_channels=self.image_channels, + final_tanh=self.final_tanh, + const_input=self.const_input, + fused_scale=self.fused_scale, + use_wscale=self.use_wscale, + noise_type=self.noise_type, + fmaps_base=self.fmaps_base, + fmaps_max=self.fmaps_max) + + self.pth_to_tf_var_mapping = {} + for key, val in self.mapping.pth_to_tf_var_mapping.items(): + self.pth_to_tf_var_mapping[f'mapping.{key}'] = val + for key, val in self.truncation.pth_to_tf_var_mapping.items(): + self.pth_to_tf_var_mapping[f'truncation.{key}'] = val + for key, val in self.synthesis.pth_to_tf_var_mapping.items(): + self.pth_to_tf_var_mapping[f'synthesis.{key}'] = val + + def set_space_of_latent(self, space_of_latent='w'): + """Sets the space to which the latent code belong. + + This function is particually used for choosing how to inject the latent + code into the convolutional layers. The original generator will take a + W-Space code and apply it for style modulation after an affine + transformation. But, sometimes, it may need to directly feed an already + affine-transformed code into the convolutional layer, e.g., when + training an encoder for GAN inversion. We term the transformed space as + Style Space (or Y-Space). This function is designed to tell the + convolutional layers how to use the input code. + + Args: + space_of_latent: The space to which the latent code belong. Case + insensitive. (default: 'w') + """ + for module in self.modules(): + if isinstance(module, StyleModLayer): + setattr(module, 'space_of_latent', space_of_latent) + + def forward(self, + z, + label=None, + lod=None, + w_moving_decay=0.995, + style_mixing_prob=0.9, + trunc_psi=None, + trunc_layers=None, + randomize_noise=False, + **_unused_kwargs): + # mapping_results = self.mapping(z, label) + # w = mapping_results['w'] + # TODO: add comment + if self.latent_is_w: + w = z + else: + mapping_results = self.mapping(z, label) + w = mapping_results['w'] + + if self.training and w_moving_decay < 1: + batch_w_avg = all_gather(w).mean(dim=0) + self.truncation.w_avg.copy_( + self.truncation.w_avg * w_moving_decay + + batch_w_avg * (1 - w_moving_decay)) + + if self.training and style_mixing_prob > 0: + new_z = torch.randn_like(z) + new_w = self.mapping(new_z, label)['w'] + lod = self.synthesis.lod.cpu().tolist() if lod is None else lod + current_layers = self.num_layers - int(lod) * 2 + if np.random.uniform() < style_mixing_prob: + mixing_cutoff = np.random.randint(1, current_layers) + w = self.truncation(w) + new_w = self.truncation(new_w) + w[:, mixing_cutoff:] = new_w[:, mixing_cutoff:] + + wp = self.truncation(w, trunc_psi, trunc_layers) + synthesis_results = self.synthesis(wp, lod, randomize_noise) + + return synthesis_results['image'] + + def get_w(self, z, truncation, trunc_layers=18, label=None): + mapping_results = self.mapping(z, label) + w = mapping_results['w'] + wp = self.truncation(w, truncation, trunc_layers) + return wp + + +class MappingModule(nn.Module): + """Implements the latent space mapping module. + + Basically, this module executes several dense layers in sequence. + """ + + def __init__(self, + input_space_dim=512, + hidden_space_dim=512, + final_space_dim=512, + label_size=0, + num_layers=8, + normalize_input=True, + use_wscale=True, + lr_mul=0.01): + super().__init__() + + self.input_space_dim = input_space_dim + self.hidden_space_dim = hidden_space_dim + self.final_space_dim = final_space_dim + self.label_size = label_size + self.num_layers = num_layers + self.normalize_input = normalize_input + self.use_wscale = use_wscale + self.lr_mul = lr_mul + + self.norm = PixelNormLayer() if self.normalize_input else nn.Identity() + + self.pth_to_tf_var_mapping = {} + for i in range(num_layers): + dim_mul = 2 if label_size else 1 + in_channels = (input_space_dim * dim_mul if i == 0 else + hidden_space_dim) + out_channels = (final_space_dim if i == (num_layers - 1) else + hidden_space_dim) + self.add_module(f'dense{i}', + DenseBlock(in_channels=in_channels, + out_channels=out_channels, + use_wscale=self.use_wscale, + lr_mul=self.lr_mul)) + self.pth_to_tf_var_mapping[f'dense{i}.weight'] = f'Dense{i}/weight' + self.pth_to_tf_var_mapping[f'dense{i}.bias'] = f'Dense{i}/bias' + if label_size: + self.label_weight = nn.Parameter( + torch.randn(label_size, input_space_dim)) + self.pth_to_tf_var_mapping[f'label_weight'] = f'LabelConcat/weight' + + def forward(self, z, label=None): + if z.ndim != 2 or z.shape[1] != self.input_space_dim: + raise ValueError(f'Input latent code should be with shape ' + f'[batch_size, input_dim], where ' + f'`input_dim` equals to {self.input_space_dim}!\n' + f'But `{z.shape}` is received!') + if self.label_size: + if label is None: + raise ValueError(f'Model requires an additional label ' + f'(with size {self.label_size}) as input, ' + f'but no label is received!') + if label.ndim != 2 or label.shape != (z.shape[0], self.label_size): + raise ValueError(f'Input label should be with shape ' + f'[batch_size, label_size], where ' + f'`batch_size` equals to that of ' + f'latent codes ({z.shape[0]}) and ' + f'`label_size` equals to {self.label_size}!\n' + f'But `{label.shape}` is received!') + embedding = torch.matmul(label, self.label_weight) + z = torch.cat((z, embedding), dim=1) + + z = self.norm(z) + w = z + for i in range(self.num_layers): + w = self.__getattr__(f'dense{i}')(w) + results = { + 'z': z, + 'label': label, + 'w': w, + } + if self.label_size: + results['embedding'] = embedding + return results + + +class TruncationModule(nn.Module): + """Implements the truncation module. + + Truncation is executed as follows: + + For layers in range [0, truncation_layers), the truncated w-code is computed + as + + w_new = w_avg + (w - w_avg) * truncation_psi + + To disable truncation, please set + (1) truncation_psi = 1.0 (None) OR + (2) truncation_layers = 0 (None) + + NOTE: The returned tensor is layer-wise style codes. + """ + + def __init__(self, w_space_dim, num_layers, repeat_w=True): + super().__init__() + + self.num_layers = num_layers + self.w_space_dim = w_space_dim + self.repeat_w = repeat_w + + if self.repeat_w: + self.register_buffer('w_avg', torch.zeros(w_space_dim)) + else: + self.register_buffer('w_avg', torch.zeros(num_layers * w_space_dim)) + self.pth_to_tf_var_mapping = {'w_avg': 'dlatent_avg'} + + def forward(self, w, trunc_psi=None, trunc_layers=None): + if w.ndim == 2: + if self.repeat_w and w.shape[1] == self.w_space_dim: + w = w.view(-1, 1, self.w_space_dim) + wp = w.repeat(1, self.num_layers, 1) + else: + assert w.shape[1] == self.w_space_dim * self.num_layers + wp = w.view(-1, self.num_layers, self.w_space_dim) + else: + wp = w + assert wp.ndim == 3 + assert wp.shape[1:] == (self.num_layers, self.w_space_dim) + + trunc_psi = 1.0 if trunc_psi is None else trunc_psi + trunc_layers = 0 if trunc_layers is None else trunc_layers + if trunc_psi < 1.0 and trunc_layers > 0: + layer_idx = np.arange(self.num_layers).reshape(1, -1, 1) + coefs = np.ones_like(layer_idx, dtype=np.float32) + coefs[layer_idx < trunc_layers] *= trunc_psi + coefs = torch.from_numpy(coefs).to(wp) + w_avg = self.w_avg.view(1, -1, self.w_space_dim) + wp = w_avg + (wp - w_avg) * coefs + return wp + + +class SynthesisModule(nn.Module): + """Implements the image synthesis module. + + Basically, this module executes several convolutional layers in sequence. + """ + + def __init__(self, + resolution=1024, + init_resolution=4, + w_space_dim=512, + image_channels=3, + final_tanh=False, + const_input=True, + fused_scale='auto', + use_wscale=True, + noise_type='spatial', + fmaps_base=16 << 10, + fmaps_max=512): + super().__init__() + + self.init_res = init_resolution + self.init_res_log2 = int(np.log2(self.init_res)) + self.resolution = resolution + self.final_res_log2 = int(np.log2(self.resolution)) + self.w_space_dim = w_space_dim + self.image_channels = image_channels + self.final_tanh = final_tanh + self.const_input = const_input + self.fused_scale = fused_scale + self.use_wscale = use_wscale + self.noise_type = noise_type + self.fmaps_base = fmaps_base + self.fmaps_max = fmaps_max + + self.num_layers = (self.final_res_log2 - self.init_res_log2 + 1) * 2 + + # Level of detail (used for progressive training). + self.register_buffer('lod', torch.zeros(())) + self.pth_to_tf_var_mapping = {'lod': 'lod'} + + for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1): + res = 2 ** res_log2 + block_idx = res_log2 - self.init_res_log2 + + # First convolution layer for each resolution. + layer_name = f'layer{2 * block_idx}' + if res == self.init_res: + if self.const_input: + self.add_module(layer_name, + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + resolution=self.init_res, + w_space_dim=self.w_space_dim, + position='const_init', + use_wscale=self.use_wscale, + noise_type=self.noise_type)) + tf_layer_name = 'Const' + self.pth_to_tf_var_mapping[f'{layer_name}.const'] = ( + f'{res}x{res}/{tf_layer_name}/const') + else: + self.add_module(layer_name, + ConvBlock(in_channels=self.w_space_dim, + out_channels=self.get_nf(res), + resolution=self.init_res, + w_space_dim=self.w_space_dim, + kernel_size=self.init_res, + padding=self.init_res - 1, + use_wscale=self.use_wscale, + noise_type=self.noise_type)) + tf_layer_name = 'Dense' + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/{tf_layer_name}/weight') + else: + if self.fused_scale == 'auto': + fused_scale = (res >= _AUTO_FUSED_SCALE_MIN_RES) + else: + fused_scale = self.fused_scale + self.add_module(layer_name, + ConvBlock(in_channels=self.get_nf(res // 2), + out_channels=self.get_nf(res), + resolution=res, + w_space_dim=self.w_space_dim, + upsample=True, + fused_scale=fused_scale, + use_wscale=self.use_wscale, + noise_type=self.noise_type)) + tf_layer_name = 'Conv0_up' + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/{tf_layer_name}/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = ( + f'{res}x{res}/{tf_layer_name}/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = ( + f'{res}x{res}/{tf_layer_name}/StyleMod/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = ( + f'{res}x{res}/{tf_layer_name}/StyleMod/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.weight'] = ( + f'{res}x{res}/{tf_layer_name}/Noise/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.noise'] = ( + f'noise{2 * block_idx}') + + # Second convolution layer for each resolution. + layer_name = f'layer{2 * block_idx + 1}' + self.add_module(layer_name, + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + resolution=res, + w_space_dim=self.w_space_dim, + use_wscale=self.use_wscale, + noise_type=self.noise_type)) + tf_layer_name = 'Conv' if res == self.init_res else 'Conv1' + self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = ( + f'{res}x{res}/{tf_layer_name}/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = ( + f'{res}x{res}/{tf_layer_name}/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = ( + f'{res}x{res}/{tf_layer_name}/StyleMod/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = ( + f'{res}x{res}/{tf_layer_name}/StyleMod/bias') + self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.weight'] = ( + f'{res}x{res}/{tf_layer_name}/Noise/weight') + self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.noise'] = ( + f'noise{2 * block_idx + 1}') + + # Output convolution layer for each resolution. + self.add_module(f'output{block_idx}', + ConvBlock(in_channels=self.get_nf(res), + out_channels=self.image_channels, + resolution=res, + w_space_dim=self.w_space_dim, + position='last', + kernel_size=1, + padding=0, + use_wscale=self.use_wscale, + wscale_gain=1.0, + activation_type='linear')) + self.pth_to_tf_var_mapping[f'output{block_idx}.weight'] = ( + f'ToRGB_lod{self.final_res_log2 - res_log2}/weight') + self.pth_to_tf_var_mapping[f'output{block_idx}.bias'] = ( + f'ToRGB_lod{self.final_res_log2 - res_log2}/bias') + + self.upsample = UpsamplingLayer() + self.final_activate = nn.Tanh() if final_tanh else nn.Identity() + + def get_nf(self, res): + """Gets number of feature maps according to current resolution.""" + return min(self.fmaps_base // res, self.fmaps_max) + + def forward(self, wp, lod=None, randomize_noise=False): + lod = self.lod.cpu().tolist() if lod is None else lod + if lod + self.init_res_log2 > self.final_res_log2: + raise ValueError(f'Maximum level-of-detail (lod) is ' + f'{self.final_res_log2 - self.init_res_log2}, ' + f'but `{lod}` is received!') + + results = {'wp': wp} + for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1): + current_lod = self.final_res_log2 - res_log2 + if lod < current_lod + 1: + block_idx = res_log2 - self.init_res_log2 + if block_idx == 0: + if self.const_input: + x, style = self.layer0(None, wp[:, 0], randomize_noise) + else: + x = wp[:, 0].view(-1, self.w_space_dim, 1, 1) + x, style = self.layer0(x, wp[:, 0], randomize_noise) + else: + x, style = self.__getattr__(f'layer{2 * block_idx}')( + x, wp[:, 2 * block_idx], randomize_noise) + results[f'style{2 * block_idx:02d}'] = style + x, style = self.__getattr__(f'layer{2 * block_idx + 1}')( + x, wp[:, 2 * block_idx + 1], randomize_noise) + results[f'style{2 * block_idx + 1:02d}'] = style + if current_lod - 1 < lod <= current_lod: + image = self.__getattr__(f'output{block_idx}')(x, None) + elif current_lod < lod < current_lod + 1: + alpha = np.ceil(lod) - lod + image = (self.__getattr__(f'output{block_idx}')(x, None) * alpha + + self.upsample(image) * (1 - alpha)) + elif lod >= current_lod + 1: + image = self.upsample(image) + results['image'] = self.final_activate(image) + return results + + +class PixelNormLayer(nn.Module): + """Implements pixel-wise feature vector normalization layer.""" + + def __init__(self, epsilon=1e-8): + super().__init__() + self.eps = epsilon + + def forward(self, x): + norm = torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.eps) + return x / norm + + +class InstanceNormLayer(nn.Module): + """Implements instance normalization layer.""" + + def __init__(self, epsilon=1e-8): + super().__init__() + self.eps = epsilon + + def forward(self, x): + if x.ndim != 4: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, channel, height, width], ' + f'but `{x.shape}` is received!') + x = x - torch.mean(x, dim=[2, 3], keepdim=True) + norm = torch.sqrt( + torch.mean(x ** 2, dim=[2, 3], keepdim=True) + self.eps) + return x / norm + + +class UpsamplingLayer(nn.Module): + """Implements the upsampling layer. + + Basically, this layer can be used to upsample feature maps with nearest + neighbor interpolation. + """ + + def __init__(self, scale_factor=2): + super().__init__() + self.scale_factor = scale_factor + + def forward(self, x): + if self.scale_factor <= 1: + return x + return F.interpolate(x, scale_factor=self.scale_factor, mode='nearest') + + +class Blur(torch.autograd.Function): + """Defines blur operation with customized gradient computation.""" + + @staticmethod + def forward(ctx, x, kernel): + ctx.save_for_backward(kernel) + y = F.conv2d(input=x, + weight=kernel, + bias=None, + stride=1, + padding=1, + groups=x.shape[1]) + return y + + @staticmethod + def backward(ctx, dy): + kernel, = ctx.saved_tensors + dx = F.conv2d(input=dy, + weight=kernel.flip((2, 3)), + bias=None, + stride=1, + padding=1, + groups=dy.shape[1]) + return dx, None, None + + +class BlurLayer(nn.Module): + """Implements the blur layer.""" + + def __init__(self, + channels, + kernel=(1, 2, 1), + normalize=True): + super().__init__() + kernel = np.array(kernel, dtype=np.float32).reshape(1, -1) + kernel = kernel.T.dot(kernel) + if normalize: + kernel /= np.sum(kernel) + kernel = kernel[np.newaxis, np.newaxis] + kernel = np.tile(kernel, [channels, 1, 1, 1]) + self.register_buffer('kernel', torch.from_numpy(kernel)) + + def forward(self, x): + return Blur.apply(x, self.kernel) + + +class NoiseApplyingLayer(nn.Module): + """Implements the noise applying layer.""" + + def __init__(self, resolution, channels, noise_type='spatial'): + super().__init__() + self.noise_type = noise_type.lower() + self.res = resolution + self.channels = channels + if self.noise_type == 'spatial': + self.register_buffer('noise', torch.randn(1, 1, self.res, self.res)) + self.weight = nn.Parameter(torch.zeros(self.channels)) + elif self.noise_type == 'channel': + self.register_buffer('noise', torch.randn(1, self.channels, 1, 1)) + self.weight = nn.Parameter(torch.zeros(self.res, self.res)) + else: + raise NotImplementedError(f'Not implemented noise type: ' + f'`{self.noise_type}`!') + + def forward(self, x, randomize_noise=False): + if x.ndim != 4: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, channel, height, width], ' + f'but `{x.shape}` is received!') + if randomize_noise: + if self.noise_type == 'spatial': + noise = torch.randn(x.shape[0], 1, self.res, self.res).to(x) + elif self.noise_type == 'channel': + noise = torch.randn(x.shape[0], self.channels, 1, 1).to(x) + else: + noise = self.noise + + if self.noise_type == 'spatial': + x = x + noise * self.weight.view(1, self.channels, 1, 1) + elif self.noise_type == 'channel': + x = x + noise * self.weight.view(1, 1, self.res, self.res) + return x + + +class StyleModLayer(nn.Module): + """Implements the style modulation layer.""" + + def __init__(self, + w_space_dim, + out_channels, + use_wscale=True): + super().__init__() + self.w_space_dim = w_space_dim + self.out_channels = out_channels + + weight_shape = (self.out_channels * 2, self.w_space_dim) + wscale = _STYLEMOD_WSCALE_GAIN / np.sqrt(self.w_space_dim) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape)) + self.wscale = wscale + else: + self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale) + self.wscale = 1.0 + + self.bias = nn.Parameter(torch.zeros(self.out_channels * 2)) + self.space_of_latent = 'w' + + def forward_style(self, w): + """Gets style code from the given input. + + More specifically, if the input is from W-Space, it will be projected by + an affine transformation. If it is from the Style Space (Y-Space), no + operation is required. + + NOTE: For codes from Y-Space, we use slicing to make sure the dimension + is correct, in case that the code is padded before fed into this layer. + """ + if self.space_of_latent == 'w': + if w.ndim != 2 or w.shape[1] != self.w_space_dim: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, w_space_dim], where ' + f'`w_space_dim` equals to ' + f'{self.w_space_dim}!\n' + f'But `{w.shape}` is received!') + style = F.linear(w, + weight=self.weight * self.wscale, + bias=self.bias) + elif self.space_of_latent == 'y': + if w.ndim != 2 or w.shape[1] < 2 * self.out_channels: + raise ValueError(f'The input tensor should be with shape ' + f'[batch_size, y_space_dim], where ' + f'`y_space_dim` equals to ' + f'{2 * self.out_channels}!\n' + f'But `{w.shape}` is received!') + style = w[:, :2 * self.out_channels] + return style + + def forward(self, x, w): + style = self.forward_style(w) + style_split = style.view(-1, 2, self.out_channels, 1, 1) + x = x * (style_split[:, 0] + 1) + style_split[:, 1] + return x, style + + +class ConvBlock(nn.Module): + """Implements the normal convolutional block. + + Basically, this block executes upsampling layer (if needed), convolutional + layer, blurring layer, noise applying layer, activation layer, instance + normalization layer, and style modulation layer in sequence. + """ + + def __init__(self, + in_channels, + out_channels, + resolution, + w_space_dim, + position=None, + kernel_size=3, + stride=1, + padding=1, + add_bias=True, + upsample=False, + fused_scale=False, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu', + noise_type='spatial'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + resolution: Resolution of the output tensor. + w_space_dim: Dimension of W space for style modulation. + position: Position of the layer. `const_init`, `last` would lead to + different behavior. (default: None) + kernel_size: Size of the convolutional kernels. (default: 3) + stride: Stride parameter for convolution operation. (default: 1) + padding: Padding parameter for convolution operation. (default: 1) + add_bias: Whether to add bias onto the convolutional result. + (default: True) + upsample: Whether to upsample the input tensor before convolution. + (default: False) + fused_scale: Whether to fused `upsample` and `conv2d` together, + resulting in `conv2d_transpose`. (default: False) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + noise_type: Type of noise added to the feature map after the + convolution (if needed). Support `spatial` and `channel`. + (default: `spatial`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + + self.position = position + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + self.bscale = lr_mul + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + if self.position != 'last': + self.apply_noise = NoiseApplyingLayer( + resolution, out_channels, noise_type=noise_type) + self.normalize = InstanceNormLayer() + self.style = StyleModLayer(w_space_dim, out_channels, use_wscale) + + if self.position == 'const_init': + self.const = nn.Parameter( + torch.ones(1, in_channels, resolution, resolution)) + return + + self.blur = BlurLayer(out_channels) if upsample else nn.Identity() + + if upsample and not fused_scale: + self.upsample = UpsamplingLayer() + else: + self.upsample = nn.Identity() + + if upsample and fused_scale: + self.use_conv2d_transpose = True + self.stride = 2 + self.padding = 1 + else: + self.use_conv2d_transpose = False + self.stride = stride + self.padding = padding + + weight_shape = (out_channels, in_channels, kernel_size, kernel_size) + fan_in = kernel_size * kernel_size * in_channels + wscale = wscale_gain / np.sqrt(fan_in) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + def forward(self, x, w, randomize_noise=False): + if self.position != 'const_init': + x = self.upsample(x) + weight = self.weight * self.wscale + if self.use_conv2d_transpose: + weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0) + weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] + + weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) + weight = weight.permute(1, 0, 2, 3) + x = F.conv_transpose2d(x, + weight=weight, + bias=None, + stride=self.stride, + padding=self.padding) + else: + x = F.conv2d(x, + weight=weight, + bias=None, + stride=self.stride, + padding=self.padding) + x = self.blur(x) + else: + x = self.const.repeat(w.shape[0], 1, 1, 1) + + bias = self.bias * self.bscale if self.bias is not None else None + + if self.position == 'last': + if bias is not None: + x = x + bias.view(1, -1, 1, 1) + return x + + x = self.apply_noise(x, randomize_noise) + if bias is not None: + x = x + bias.view(1, -1, 1, 1) + x = self.activate(x) + x = self.normalize(x) + x, style = self.style(x, w) + return x, style + + +class DenseBlock(nn.Module): + """Implements the dense block. + + Basically, this block executes fully-connected layer and activation layer. + """ + + def __init__(self, + in_channels, + out_channels, + add_bias=True, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type='lrelu'): + """Initializes with block settings. + + Args: + in_channels: Number of channels of the input tensor. + out_channels: Number of channels of the output tensor. + add_bias: Whether to add bias onto the fully-connected result. + (default: True) + use_wscale: Whether to use weight scaling. (default: True) + wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN) + lr_mul: Learning multiplier for both weight and bias. (default: 1.0) + activation_type: Type of activation. Support `linear` and `lrelu`. + (default: `lrelu`) + + Raises: + NotImplementedError: If the `activation_type` is not supported. + """ + super().__init__() + weight_shape = (out_channels, in_channels) + wscale = wscale_gain / np.sqrt(in_channels) + if use_wscale: + self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) + self.wscale = wscale * lr_mul + else: + self.weight = nn.Parameter( + torch.randn(*weight_shape) * wscale / lr_mul) + self.wscale = lr_mul + + if add_bias: + self.bias = nn.Parameter(torch.zeros(out_channels)) + self.bscale = lr_mul + else: + self.bias = None + + if activation_type == 'linear': + self.activate = nn.Identity() + elif activation_type == 'lrelu': + self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + raise NotImplementedError(f'Not implemented activation function: ' + f'`{activation_type}`!') + + def forward(self, x): + if x.ndim != 2: + x = x.view(x.shape[0], -1) + bias = self.bias * self.bscale if self.bias is not None else None + x = F.linear(x, weight=self.weight * self.wscale, bias=bias) + x = self.activate(x) + return x diff --git a/ContraCLIP/models/genforce/models/sync_op.py b/ContraCLIP/models/genforce/models/sync_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b3d3bf9d1db037523208c0e96c78dedb4f90014b --- /dev/null +++ b/ContraCLIP/models/genforce/models/sync_op.py @@ -0,0 +1,18 @@ +# python3.7 +"""Contains the synchronizing operator.""" + +import torch +import torch.distributed as dist + +__all__ = ['all_gather'] + + +def all_gather(tensor): + """Gathers tensor from all devices and does averaging.""" + if not dist.is_initialized(): + return tensor + + world_size = dist.get_world_size() + tensor_list = [torch.ones_like(tensor) for _ in range(world_size)] + dist.all_gather(tensor_list, tensor, async_op=False) + return torch.mean(torch.stack(tensor_list, dim=0), dim=0) diff --git a/ContraCLIP/models/genforce/runners/__init__.py b/ContraCLIP/models/genforce/runners/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4a859d39688560c56a7c9768e6ca273a7dbd57 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/__init__.py @@ -0,0 +1,6 @@ +# python3.7 +"""Collects all runners.""" +from .stylegan_runner import StyleGANRunner +from .encoder_runner import EncoderRunner + +__all__ = ['StyleGANRunner', 'EncoderRunner'] diff --git a/ContraCLIP/models/genforce/runners/base_encoder_runner.py b/ContraCLIP/models/genforce/runners/base_encoder_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..0b2433e641fa44ce81ea2aefdd9d950abd0af051 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/base_encoder_runner.py @@ -0,0 +1,152 @@ +# python3.7 +"""Contains the base class for Encoder (GAN Inversion) runner.""" + +import os +import shutil + +import torch +import torch.distributed as dist + +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import get_grid_shape +from utils.visualizer import postprocess_image +from utils.visualizer import save_image +from utils.visualizer import load_image +from .base_runner import BaseRunner + +__all__ = ['BaseEncoderRunner'] + + +class BaseEncoderRunner(BaseRunner): + """Defines the base class for Encoder runner.""" + + def __init__(self, config, logger): + super().__init__(config, logger) + self.inception_model = None + + def build_models(self): + super().build_models() + assert 'encoder' in self.models + assert 'generator' in self.models + assert 'discriminator' in self.models + + self.resolution = self.models['generator'].resolution + self.G_kwargs_train = self.config.modules['generator'].get( + 'kwargs_train', dict()) + self.G_kwargs_val = self.config.modules['generator'].get( + 'kwargs_val', dict()) + self.D_kwargs_train = self.config.modules['discriminator'].get( + 'kwargs_train', dict()) + self.D_kwargs_val = self.config.modules['discriminator'].get( + 'kwargs_val', dict()) + + def train_step(self, data, **train_kwargs): + raise NotImplementedError('Should be implemented in derived class.') + + def val(self, **val_kwargs): + self.synthesize(**val_kwargs) + + def synthesize(self, + num, + html_name=None, + save_raw_synthesis=False): + """Synthesizes images. + + Args: + num: Number of images to synthesize. + z: Latent codes used for generation. If not specified, this function + will sample latent codes randomly. (default: None) + html_name: Name of the output html page for visualization. If not + specified, no visualization page will be saved. (default: None) + save_raw_synthesis: Whether to save raw synthesis on the disk. + (default: False) + """ + if not html_name and not save_raw_synthesis: + return + + self.set_mode('val') + + if self.val_loader is None: + self.build_dataset('val') + + temp_dir = os.path.join(self.work_dir, 'synthesize_results') + os.makedirs(temp_dir, exist_ok=True) + + if not num: + return + if num % self.val_batch_size != 0: + num = (num //self.val_batch_size +1)*self.val_batch_size + # TODO: Use same z during the entire training process. + + self.logger.init_pbar() + task1 = self.logger.add_pbar_task('Synthesize', total=num) + + indices = list(range(self.rank, num, self.world_size)) + for batch_idx in range(0, len(indices), self.val_batch_size): + sub_indices = indices[batch_idx:batch_idx + self.val_batch_size] + batch_size = len(sub_indices) + data = next(self.val_loader) + for key in data: + data[key] = data[key][:batch_size].cuda( + torch.cuda.current_device(), non_blocking=True) + + with torch.no_grad(): + real_images = data['image'] + E = self.models['encoder'] + if 'generator_smooth' in self.models: + G = self.get_module(self.models['generator_smooth']) + else: + G = self.get_module(self.models['generator']) + latents = E(real_images) + if self.config.space_of_latent == 'z': + rec_images = G( + latents, **self.G_kwargs_val)['image'] + elif self.config.space_of_latent == 'wp': + rec_images = G.synthesis( + latents, **self.G_kwargs_val)['image'] + elif self.config.space_of_latent == 'y': + G.set_space_of_latent('y') + rec_images = G.synthesis( + latents, **self.G_kwargs_val)['image'] + else: + raise NotImplementedError( + f'Space of latent `{self.config.space_of_latent}` ' + f'is not supported!') + rec_images = postprocess_image( + rec_images.detach().cpu().numpy()) + real_images = postprocess_image( + real_images.detach().cpu().numpy()) + for sub_idx, rec_image, real_image in zip( + sub_indices, rec_images, real_images): + save_image(os.path.join(temp_dir, f'{sub_idx:06d}_rec.jpg'), + rec_image) + save_image(os.path.join(temp_dir, f'{sub_idx:06d}_ori.jpg'), + real_image) + self.logger.update_pbar(task1, batch_size * self.world_size) + + dist.barrier() + if self.rank != 0: + return + + if html_name: + task2 = self.logger.add_pbar_task('Visualize', total=num) + row, col = get_grid_shape(num * 2) + if row % 2 != 0: + row, col = col, row + html = HtmlPageVisualizer(num_rows=row, num_cols=col) + for image_idx in range(num): + rec_image = load_image( + os.path.join(temp_dir, f'{image_idx:06d}_rec.jpg')) + real_image = load_image( + os.path.join(temp_dir, f'{image_idx:06d}_ori.jpg')) + row_idx, col_idx = divmod(image_idx, html.num_cols) + html.set_cell(2*row_idx, col_idx, image=real_image, + text=f'Sample {image_idx:06d}_ori') + html.set_cell(2*row_idx+1, col_idx, image=rec_image, + text=f'Sample {image_idx:06d}_rec') + self.logger.update_pbar(task2, 1) + html.save(os.path.join(self.work_dir, html_name)) + if not save_raw_synthesis: + shutil.rmtree(temp_dir) + + self.logger.close_pbar() diff --git a/ContraCLIP/models/genforce/runners/base_gan_runner.py b/ContraCLIP/models/genforce/runners/base_gan_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..214f4057e91a68e814a5a6deda15c370d6cb5858 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/base_gan_runner.py @@ -0,0 +1,266 @@ +# python3.7 +"""Contains the base class for GAN runner.""" + +import os +import shutil +import numpy as np + +import torch +import torch.distributed as dist + +from metrics.inception import build_inception_model +from metrics.fid import extract_feature +from metrics.fid import compute_fid +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image +from utils.visualizer import save_image +from utils.visualizer import load_image +from .base_runner import BaseRunner + +__all__ = ['BaseGANRunner'] + + +class BaseGANRunner(BaseRunner): + """Defines the base class for GAN runner.""" + + def __init__(self, config, logger): + super().__init__(config, logger) + self.inception_model = None + + def moving_average_model(self, model, avg_model, beta=0.999): + """Moving average model weights. + + This trick is commonly used in GAN training, where the weight of the + generator is life-long averaged + + Args: + model: The latest model used to update the averaged weights. + avg_model: The averaged model weights. + beta: Hyper-parameter used for moving average. + """ + model_params = dict(self.get_module(model).named_parameters()) + avg_params = dict(self.get_module(avg_model).named_parameters()) + + assert len(model_params) == len(avg_params) + for param_name in avg_params: + assert param_name in model_params + avg_params[param_name].data = ( + avg_params[param_name].data * beta + + model_params[param_name].data * (1 - beta)) + + def build_models(self): + super().build_models() + assert 'generator' in self.models + assert 'discriminator' in self.models + self.z_space_dim = self.models['generator'].z_space_dim + self.resolution = self.models['generator'].resolution + self.G_kwargs_train = self.config.modules['generator'].get( + 'kwargs_train', dict()) + self.G_kwargs_val = self.config.modules['generator'].get( + 'kwargs_val', dict()) + self.D_kwargs_train = self.config.modules['discriminator'].get( + 'kwargs_train', dict()) + self.D_kwargs_val = self.config.modules['discriminator'].get( + 'kwargs_val', dict()) + + def train_step(self, data, **train_kwargs): + raise NotImplementedError('Should be implemented in derived class.') + + def val(self, **val_kwargs): + self.synthesize(**val_kwargs) + + def synthesize(self, + num, + z=None, + html_name=None, + save_raw_synthesis=False): + """Synthesizes images. + + Args: + num: Number of images to synthesize. + z: Latent codes used for generation. If not specified, this function + will sample latent codes randomly. (default: None) + html_name: Name of the output html page for visualization. If not + specified, no visualization page will be saved. (default: None) + save_raw_synthesis: Whether to save raw synthesis on the disk. + (default: False) + """ + if not html_name and not save_raw_synthesis: + return + + self.set_mode('val') + + temp_dir = os.path.join(self.work_dir, 'synthesize_results') + os.makedirs(temp_dir, exist_ok=True) + + if z is not None: + assert isinstance(z, np.ndarray) + assert z.ndim == 2 and z.shape[1] == self.z_space_dim + num = min(num, z.shape[0]) + z = torch.from_numpy(z).type(torch.FloatTensor) + if not num: + return + # TODO: Use same z during the entire training process. + + self.logger.init_pbar() + task1 = self.logger.add_pbar_task('Synthesize', total=num) + + indices = list(range(self.rank, num, self.world_size)) + for batch_idx in range(0, len(indices), self.val_batch_size): + sub_indices = indices[batch_idx:batch_idx + self.val_batch_size] + batch_size = len(sub_indices) + if z is None: + code = torch.randn(batch_size, self.z_space_dim).cuda() + else: + code = z[sub_indices].cuda() + with torch.no_grad(): + if 'generator_smooth' in self.models: + G = self.models['generator_smooth'] + else: + G = self.models['generator'] + images = G(code, **self.G_kwargs_val)['image'] + images = postprocess_image(images.detach().cpu().numpy()) + for sub_idx, image in zip(sub_indices, images): + save_image(os.path.join(temp_dir, f'{sub_idx:06d}.jpg'), image) + self.logger.update_pbar(task1, batch_size * self.world_size) + + dist.barrier() + if self.rank != 0: + return + + if html_name: + task2 = self.logger.add_pbar_task('Visualize', total=num) + html = HtmlPageVisualizer(grid_size=num) + for image_idx in range(num): + image = load_image( + os.path.join(temp_dir, f'{image_idx:06d}.jpg')) + row_idx, col_idx = divmod(image_idx, html.num_cols) + html.set_cell(row_idx, col_idx, image=image, + text=f'Sample {image_idx:06d}') + self.logger.update_pbar(task2, 1) + html.save(os.path.join(self.work_dir, html_name)) + if not save_raw_synthesis: + shutil.rmtree(temp_dir) + + self.logger.close_pbar() + + def fid(self, + fid_num, + z=None, + ignore_cache=False, + align_tf=True): + """Computes the FID metric.""" + self.set_mode('val') + + if self.val_loader is None: + self.build_dataset('val') + fid_num = min(fid_num, len(self.val_loader.dataset)) + + if self.inception_model is None: + if align_tf: + self.logger.info(f'Building inception model ' + f'(aligned with TensorFlow) ...') + else: + self.logger.info(f'Building inception model ' + f'(using torchvision) ...') + self.inception_model = build_inception_model(align_tf).cuda() + self.logger.info(f'Finish building inception model.') + + if z is not None: + assert isinstance(z, np.ndarray) + assert z.ndim == 2 and z.shape[1] == self.z_space_dim + fid_num = min(fid_num, z.shape[0]) + z = torch.from_numpy(z).type(torch.FloatTensor) + if not fid_num: + return -1 + + indices = list(range(self.rank, fid_num, self.world_size)) + + self.logger.init_pbar() + + # Extract features from fake images. + fake_feature_list = [] + task1 = self.logger.add_pbar_task('Fake', total=fid_num) + for batch_idx in range(0, len(indices), self.val_batch_size): + sub_indices = indices[batch_idx:batch_idx + self.val_batch_size] + batch_size = len(sub_indices) + if z is None: + code = torch.randn(batch_size, self.z_space_dim).cuda() + else: + code = z[sub_indices].cuda() + with torch.no_grad(): + if 'generator_smooth' in self.models: + G = self.models['generator_smooth'] + else: + G = self.models['generator'] + fake_images = G(code)['image'] + fake_feature_list.append( + extract_feature(self.inception_model, fake_images)) + self.logger.update_pbar(task1, batch_size * self.world_size) + np.save(f'{self.work_dir}/fake_fid_features_{self.rank}.npy', + np.concatenate(fake_feature_list, axis=0)) + + # Extract features from real images if needed. + cached_fid_file = f'{self.work_dir}/real_fid{fid_num}.npy' + do_real_test = (not os.path.exists(cached_fid_file) or ignore_cache) + if do_real_test: + real_feature_list = [] + task2 = self.logger.add_pbar_task("Real", total=fid_num) + for batch_idx in range(0, len(indices), self.val_batch_size): + sub_indices = indices[batch_idx:batch_idx + self.val_batch_size] + batch_size = len(sub_indices) + data = next(self.val_loader) + for key in data: + data[key] = data[key][:batch_size].cuda( + torch.cuda.current_device(), non_blocking=True) + with torch.no_grad(): + real_images = data['image'] + real_feature_list.append( + extract_feature(self.inception_model, real_images)) + self.logger.update_pbar(task2, batch_size * self.world_size) + np.save(f'{self.work_dir}/real_fid_features_{self.rank}.npy', + np.concatenate(real_feature_list, axis=0)) + + dist.barrier() + if self.rank != 0: + return -1 + self.logger.close_pbar() + + # Collect fake features. + fake_feature_list.clear() + for rank in range(self.world_size): + fake_feature_list.append( + np.load(f'{self.work_dir}/fake_fid_features_{rank}.npy')) + os.remove(f'{self.work_dir}/fake_fid_features_{rank}.npy') + fake_features = np.concatenate(fake_feature_list, axis=0) + assert fake_features.ndim == 2 and fake_features.shape[0] == fid_num + feature_dim = fake_features.shape[1] + pad = fid_num % self.world_size + if pad: + pad = self.world_size - pad + fake_features = np.pad(fake_features, ((0, pad), (0, 0))) + fake_features = fake_features.reshape(self.world_size, -1, feature_dim) + fake_features = fake_features.transpose(1, 0, 2) + fake_features = fake_features.reshape(-1, feature_dim)[:fid_num] + + # Collect (or load) real features. + if do_real_test: + real_feature_list.clear() + for rank in range(self.world_size): + real_feature_list.append( + np.load(f'{self.work_dir}/real_fid_features_{rank}.npy')) + os.remove(f'{self.work_dir}/real_fid_features_{rank}.npy') + real_features = np.concatenate(real_feature_list, axis=0) + assert real_features.shape == (fid_num, feature_dim) + real_features = np.pad(real_features, ((0, pad), (0, 0))) + real_features = real_features.reshape( + self.world_size, -1, feature_dim) + real_features = real_features.transpose(1, 0, 2) + real_features = real_features.reshape(-1, feature_dim)[:fid_num] + np.save(cached_fid_file, real_features) + else: + real_features = np.load(cached_fid_file) + assert real_features.shape == (fid_num, feature_dim) + + fid_value = compute_fid(fake_features, real_features) + return fid_value diff --git a/ContraCLIP/models/genforce/runners/base_runner.py b/ContraCLIP/models/genforce/runners/base_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..c05146e55375a5a310192d5ddc8a9a9aaa3e0a5b --- /dev/null +++ b/ContraCLIP/models/genforce/runners/base_runner.py @@ -0,0 +1,451 @@ +# python3.7 +"""Contains the base class for runner. + +This runner can be used for both training and inference with multi-threads. +""" + +import os +import json +from copy import deepcopy + +import torch +import torch.distributed as dist + +from datasets import BaseDataset +from datasets import IterDataLoader +from models import build_model +from . import controllers +from . import losses +from . import misc +from .optimizer import build_optimizers +from .running_stats import RunningStats + + +def _strip_state_dict_prefix(state_dict, prefix='module.'): + """Removes the name prefix in checkpoint. + + Basically, when the model is deployed in parallel, the prefix `module.` will + be added to the saved checkpoint. This function is used to remove the + prefix, which is friendly to checkpoint loading. + + Args: + state_dict: The state dict where the variable names are processed. + prefix: The prefix to remove. (default: `module.`) + """ + if not all(key.startswith(prefix) for key in state_dict.keys()): + return state_dict + + stripped_state_dict = dict() + for key in state_dict: + stripped_state_dict[key.replace(prefix, '')] = state_dict[key] + return stripped_state_dict + + +class BaseRunner(object): + """Defines the base runner class.""" + + def __init__(self, config, logger): + self._name = self.__class__.__name__ + self._config = deepcopy(config) + self.logger = logger + self.work_dir = self.config.work_dir + os.makedirs(self.work_dir, exist_ok=True) + + self.logger.info('Running Configuration:') + config_str = json.dumps(self.config, indent=4).replace('"', '\'') + self.logger.print(config_str + '\n') + with open(os.path.join(self.work_dir, 'config.json'), 'w') as f: + json.dump(self.config, f, indent=4) + self._rank = dist.get_rank() + self._world_size = dist.get_world_size() + + self.batch_size = self.config.batch_size + self.val_batch_size = self.config.get('val_batch_size', self.batch_size) + self._iter = 0 + self._start_iter = 0 + self.seen_img = 0 + self.total_iters = self.config.get('total_iters', 0) + if self.total_iters == 0 and self.config.get('total_img', 0) > 0: + total_image = self.config.get('total_img') + total_batch = self.world_size * self.batch_size + self.total_iters = int(total_image / total_batch + 0.5) + + self.mode = None + self.train_loader = None + self.val_loader = None + + self.models = dict() + self.optimizers = dict() + self.lr_schedulers = dict() + self.controllers = [] + self.loss = None + + self.running_stats = RunningStats() + self.start_time = 0 + self.end_time = 0 + self.timer = controllers.Timer() + self.timer.start(self) + + self.build_models() + self.build_controllers() + + def finish(self): + """Finishes runner by ending controllers and timer.""" + for controller in self.controllers: + controller.end(self) + self.timer.end(self) + self.logger.info(f'Finish runner in ' + f'{misc.format_time(self.end_time - self.start_time)}') + + @property + def name(self): + """Returns the name of the runner.""" + return self._name + + @property + def config(self): + """Returns the configuration of the runner.""" + return self._config + + @property + def rank(self): + """Returns the rank of the current runner.""" + return self._rank + + @property + def world_size(self): + """Returns the world size.""" + return self._world_size + + @property + def iter(self): + """Returns the current iteration.""" + return self._iter + + @property + def start_iter(self): + """Returns the start iteration.""" + return self._start_iter + + def convert_epoch_to_iter(self, epoch): + """Converts number of epochs to number of iterations.""" + return int(epoch * len(self.train_loader) + 0.5) + + def build_dataset(self, mode): + """Builds train/val dataset.""" + if not hasattr(self.config, 'data'): + return + assert isinstance(mode, str) + mode = mode.lower() + self.logger.info(f'Building `{mode}` dataset ...') + if mode not in ['train', 'val']: + raise ValueError(f'Invalid dataset mode `{mode}`!') + dataset = BaseDataset(**self.config.data[mode]) + if mode == 'train': + self.train_loader = IterDataLoader( + dataset=dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.config.data.get('num_workers', 2), + current_iter=self.iter, + repeat=self.config.data.get('repeat', 1)) + elif mode == 'val': + self.val_loader = IterDataLoader( + dataset=dataset, + batch_size=self.val_batch_size, + shuffle=False, + num_workers=self.config.data.get('num_workers', 2), + current_iter=0, + repeat=1) + else: + raise NotImplementedError(f'Not implemented dataset mode `{mode}`!') + self.logger.info(f'Finish building `{mode}` dataset.') + + def build_models(self): + """Builds models, optimizers, and learning rate schedulers.""" + self.logger.info(f'Building models ...') + lr_config = dict() + opt_config = dict() + for module, module_config in self.config.modules.items(): + model_config = module_config['model'] + self.models[module] = build_model(module=module, **model_config) + self.models[module].cuda() + opt_config[module] = module_config.get('opt', None) + lr_config[module] = module_config.get('lr', None) + build_optimizers(opt_config, self) + self.controllers.append(controllers.LRScheduler(lr_config)) + self.logger.info(f'Finish building models.') + + model_info = 'Model structures:\n' + model_info += '==============================================\n' + for module in self.models: + model_info += f'{module}\n' + model_info += '----------------------------------------------\n' + model_info += str(self.models[module]) + model_info += '\n' + model_info += "==============================================\n" + self.logger.info(model_info) + + def distribute(self): + """Sets `self.model` as `torch.nn.parallel.DistributedDataParallel`.""" + for name in self.models: + self.models[name] = torch.nn.parallel.DistributedDataParallel( + module=self.models[name], + device_ids=[torch.cuda.current_device()], + broadcast_buffers=False, + find_unused_parameters=True) + + @staticmethod + def get_module(model): + """Handles distributed model.""" + if hasattr(model, 'module'): + return model.module + return model + + def build_controllers(self): + """Builds additional controllers besides LRScheduler.""" + if not hasattr(self.config, 'controllers'): + return + self.logger.info(f'Building controllers ...') + for key, ctrl_config in self.config.controllers.items(): + self.controllers.append(getattr(controllers, key)(ctrl_config)) + self.controllers.sort(key=lambda x: x.priority) + for controller in self.controllers: + controller.start(self) + self.logger.info(f'Finish building controllers.') + + def build_loss(self): + """Builds loss functions.""" + if not hasattr(self.config, 'loss'): + return + self.logger.info(f'Building loss function ...') + loss_config = deepcopy(self.config.loss) + loss_type = loss_config.pop('type') + self.loss = getattr(losses, loss_type)(self, **loss_config) + self.logger.info(f'Finish building loss function.') + + def pre_execute_controllers(self): + """Pre-executes all controllers in order of priority.""" + for controller in self.controllers: + controller.pre_execute(self) + + def post_execute_controllers(self): + """Post-executes all controllers in order of priority.""" + for controller in self.controllers: + controller.post_execute(self) + + def cpu(self): + """Puts models to CPU.""" + for name in self.models: + self.models[name].cpu() + + def cuda(self): + """Puts models to CUDA.""" + for name in self.models: + self.models[name].cuda() + + def set_model_requires_grad(self, name, requires_grad): + """Sets the `requires_grad` configuration for a particular model.""" + for param in self.models[name].parameters(): + param.requires_grad = requires_grad + + def set_models_requires_grad(self, requires_grad): + """Sets the `requires_grad` configuration for all models.""" + for name in self.models: + self.set_model_requires_grad(name, requires_grad) + + def set_model_mode(self, name, mode): + """Sets the `train/val` mode for a particular model.""" + if isinstance(mode, str): + mode = mode.lower() + if mode == 'train' or mode is True: + self.models[name].train() + elif mode in ['val', 'test', 'eval'] or mode is False: + self.models[name].eval() + else: + raise ValueError(f'Invalid model mode `{mode}`!') + + def set_mode(self, mode): + """Sets the `train/val` mode for all models.""" + self.mode = mode + for name in self.models: + self.set_model_mode(name, mode) + + def train_step(self, data, **train_kwargs): + """Executes one training step.""" + raise NotImplementedError('Should be implemented in derived class.') + + def train(self, **train_kwargs): + """Training function.""" + self.set_mode('train') + self.distribute() + self.build_dataset('train') + self.build_loss() + + self.logger.print() + self.logger.info(f'Start training.') + if self.total_iters == 0: + total_epochs = self.config.get('total_epochs', 0) + self.total_iters = self.convert_epoch_to_iter(total_epochs) + assert self.total_iters > 0 + while self.iter < self.total_iters: + self._iter += 1 + self.pre_execute_controllers() + data_batch = next(self.train_loader) + self.timer.pre_execute(self) + for key in data_batch: + assert data_batch[key].shape[0] == self.batch_size + data_batch[key] = data_batch[key].cuda( + torch.cuda.current_device(), non_blocking=True) + self.train_step(data_batch, **train_kwargs) + self.seen_img += self.batch_size * self.world_size + self.timer.post_execute(self) + self.post_execute_controllers() + self.finish() + + def val(self, **val_kwargs): + """Validation function.""" + raise NotImplementedError('Should be implemented in derived class.') + + def save(self, + filepath, + running_metadata=True, + learning_rate=True, + optimizer=True, + running_stats=False): + """Saves the current running status. + Args: + filepath: File path to save the checkpoint. + running_metadata: Whether to save the running metadata, such as + batch size, current iteration, etc. (default: True) + learning_rate: Whether to save the learning rate. (default: True) + optimizer: Whether to save the optimizer. (default: True) + running_stats: Whether to save the running stats. (default: False) + """ + checkpoint = dict() + # Models. + checkpoint['models'] = dict() + for name, model in self.models.items(): + checkpoint['models'][name] = self.get_module(model).state_dict() + # Running metadata. + if running_metadata: + checkpoint['running_metadata'] = { + 'iter': self.iter, + 'seen_img': self.seen_img, + } + # Optimizers. + if optimizer: + checkpoint['optimizers'] = dict() + for opt_name, opt in self.optimizers.items(): + checkpoint['optimizers'][opt_name] = opt.state_dict() + # Learning rates. + if learning_rate: + checkpoint['learning_rates'] = dict() + for lr_name, lr in self.lr_schedulers.items(): + checkpoint['learning_rates'][lr_name] = lr.state_dict() + # Running stats. + # TODO: Test saving and loading running stats. + if running_stats: + checkpoint['running_stats'] = self.running_stats + # Save checkpoint. + os.makedirs(os.path.dirname(filepath), exist_ok=True) + torch.save(checkpoint, filepath) + self.logger.info(f'Successfully saved checkpoint to `{filepath}`.') + + def load(self, + filepath, + running_metadata=True, + learning_rate=True, + optimizer=True, + running_stats=False, + map_location='cpu'): + """Loads previous running status. + + Args: + filepath: File path to load the checkpoint. + running_metadata: Whether to load the running metadata, such as + batch size, current iteration, etc. (default: True) + learning_rate: Whether to load the learning rate. (default: True) + optimizer: Whether to load the optimizer. (default: True) + running_stats: Whether to load the running stats. (default: False) + map_location: Map location used for model loading. (default: `cpu`) + """ + self.logger.info(f'Resuming from checkpoint `{filepath}` ...') + if not os.path.isfile(filepath): + raise IOError(f'Checkpoint `{filepath}` does not exist!') + map_location = map_location.lower() + assert map_location in ['cpu', 'gpu'] + if map_location == 'gpu': + device = torch.cuda.current_device() + map_location = lambda storage, location: storage.cuda(device) + checkpoint = torch.load(filepath, map_location=map_location) + # Load models. + if 'models' not in checkpoint: + checkpoint = {'models': checkpoint} + for model_name, model in self.models.items(): + if model_name not in checkpoint['models']: + self.logger.warning(f'Model `{model_name}` is not included in ' + f'the checkpoint, and hence will NOT be ' + f'loaded!') + continue + state_dict = _strip_state_dict_prefix( + checkpoint['models'][model_name]) + model.load_state_dict(state_dict) + self.logger.info(f' Successfully loaded model `{model_name}`.') + # Load running metedata. + if running_metadata: + if 'running_metadata' not in checkpoint: + self.logger.warning(f'Running metadata is not included in the ' + f'checkpoint, and hence will NOT be ' + f'loaded!') + else: + self._iter = checkpoint['running_metadata']['iter'] + self._start_iter = self._iter + self.seen_img = checkpoint['running_metadata']['seen_img'] + # Load optimizers. + if optimizer: + if 'optimizers' not in checkpoint: + self.logger.warning(f'Optimizers are not included in the ' + f'checkpoint, and hence will NOT be ' + f'loaded!') + else: + for opt_name, opt in self.optimizers.items(): + if opt_name not in checkpoint['optimizers']: + self.logger.warning(f'Optimizer `{opt_name}` is not ' + f'included in the checkpoint, and ' + f'hence will NOT be loaded!') + continue + opt.load_state_dict(checkpoint['optimizers'][opt_name]) + self.logger.info(f' Successfully loaded optimizer ' + f'`{opt_name}`.') + # Load learning rates. + if learning_rate: + if 'learning_rates' not in checkpoint: + self.logger.warning(f'Learning rates are not included in the ' + f'checkpoint, and hence will NOT be ' + f'loaded!') + else: + for lr_name, lr in self.lr_schedulers.items(): + if lr_name not in checkpoint['learning_rates']: + self.logger.warning(f'Learning rate `{lr_name}` is not ' + f'included in the checkpoint, and ' + f'hence will NOT be loaded!') + continue + lr.load_state_dict(checkpoint['learning_rates'][lr_name]) + self.logger.info(f' Successfully loaded learning rate ' + f'`{lr_name}`.') + # Load running stats. + if running_stats: + if 'running_stats' not in checkpoint: + self.logger.warning(f'Running stats is not included in the ' + f'checkpoint, and hence will NOT be ' + f'loaded!') + else: + self.running_stats = deepcopy(checkpoint['running_stats']) + self.logger.info(f' Successfully loaded running stats.') + # Log message. + tailing_message = '' + if running_metadata and 'running_metadata' in checkpoint: + tailing_message = f' (iteration {self.iter})' + self.logger.info(f'Successfully resumed from checkpoint `{filepath}`.' + f'{tailing_message}') diff --git a/ContraCLIP/models/genforce/runners/controllers/__init__.py b/ContraCLIP/models/genforce/runners/controllers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..124438ea69c709e48c874ba623542e5d81e6f4e4 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/__init__.py @@ -0,0 +1,16 @@ +# python3.7 +"""Collects all controllers.""" + +from .cache_cleaner import CacheCleaner +from .checkpointer import Checkpointer +from .fid_evaluator import FIDEvaluator +from .lr_scheduler import LRScheduler +from .progress_scheduler import ProgressScheduler +from .running_logger import RunningLogger +from .snapshoter import Snapshoter +from .timer import Timer + +__all__ = [ + 'CacheCleaner', 'Checkpointer', 'FIDEvaluator', 'LRScheduler', + 'ProgressScheduler', 'RunningLogger', 'Snapshoter', 'Timer' +] diff --git a/ContraCLIP/models/genforce/runners/controllers/base_controller.py b/ContraCLIP/models/genforce/runners/controllers/base_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..6a6e96de9679652b688b5fe2875649d3fe89d185 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/base_controller.py @@ -0,0 +1,249 @@ +# python3.7 +"""Contains the base class for model running controllers.""" + +__all__ = ['BaseController'] + +_CONTROLLER_PRIORITY_ALIASES = { + 'FIRST': 0, + 'HIGH': 25, + 'MEDIUM': 50, + 'LOW': 75, + 'LAST': 100, +} + + +def _parse_controller_priority(priority): + """Parses the controller priority. + + Smaller number means higher priority. Controllers with higher priority will + be executed first after the running iteration and executed last before the + running iteration. Priority of controllers can be set in the configeration + file. All priorities should be with integer type and lie in range [0, 100]. + + Followings are some aliases for the default priorities: + + (1) FIRST: 0 + (2) HIGH: 25 + (3) MEDIUM: 50 + (4) LOW: 75 + (5) LAST: 100 + + Args: + priority: An integer or a string (alias) indicating the priority. + + Returns: + An integer representing the parsed priority. + + Raises: + TypeError: If the input `priority` is not with `int` or `str` type. + ValueError: If the input `priority` is out of range [0, 100] or the + `priority` is an invalid alias. + """ + if isinstance(priority, int): + if not 0 <= priority <= 100: + raise ValueError(f'Controller priority should lie in range ' + f'[0, 100], but `{priority}` is received!') + return priority + if isinstance(priority, str): + try: + return _CONTROLLER_PRIORITY_ALIASES[priority.upper()] + except KeyError: + raise ValueError(f'Unknown alias `{priority}` for controller ' + f'priority!\n' + f'Please choose from: ' + f'{list(_CONTROLLER_PRIORITY_ALIASES)}.') + raise TypeError(f'Input `priority` should be with type `int` or `str`, ' + f'but `{type(priority)}` is received!') + + +class BaseController(object): + """The base class for model running controllers. + + Controllers are commonly used to control/monitor the running process, such + as adjusting learning rate, saving log messages, etc. Within each iteration + of model running, all controllers will be checked TWICE (i.e., before and + after the iteration) on whether to execute the control. + + This class contains following members for a better control: + + (0) priority: Execution priority, which determines the execution order among + all controllers. See function `_parse_controller_priority()` for more + details. (default: `MEDIUM`) + (1) every_n_iters: Executable for every n iterations. `-1` means ignored. + (default: -1) + (2) every_n_epochs: Executable for every n epochs. `-1` means ignored. + (default: -1) + (3) first_iter: Enforce to execute on the first iteration. (default: True) + (4) marster_only: Executable only on the master worker. (default: False) + """ + + def __init__(self, config=None): + """Initializes the controller with basic settings. + + Args: + config: The configuration for the controller, which is loaded from + the configuration file. This field should be a dictionary. + (default: None) + """ + config = config or dict() + assert isinstance(config, dict) + + self._name = self.__class__.__name__ + self._config = config.copy() + priority = config.get('priority', 'MEDIUM') + self._priority = _parse_controller_priority(priority) + self._every_n_iters = config.get('every_n_iters', -1) + self._every_n_epochs = config.get('every_n_epochs', -1) + self._first_iter = config.get('first_iter', True) + self._master_only = config.get('master_only', False) + + @property + def name(self): + """Returns the name of the controller.""" + return self._name + + @property + def config(self): + """Returns the configuration for the controller.""" + return self._config + + @property + def priority(self): + """Returns the execution priority of the controller.""" + return self._priority + + @property + def every_n_iters(self): + """Returns how often (in iterations) the controller is executed.""" + return self._every_n_iters + + @property + def every_n_epochs(self): + """Returns how often (in epochs) the controller is executed.""" + return self._every_n_epochs + + @property + def first_iter(self): + """Returns whether the controller is forcibly executed initially.""" + return self._first_iter + + @property + def master_only(self): + """Returns whether the controller is executed on master worker only.""" + return self._master_only + + def setup(self, runner): + """Sets up the controller before running. + + Default behavior is to do nothing. Can be overridden in derived classes. + + Args: + runner: The runner to control. + """ + + def close(self, runner): + """Closes the controller after running. + + Default behavior is to do nothing. Can be overridden in derived classes. + + Args: + runner: The runner to control. + """ + + def execute_before_iteration(self, runner): + """Executes the controller before the iteration. + + Default behavior is to do nothing. Can be overridden in derived classes. + + Args: + runner: The runner to control. + """ + + def execute_after_iteration(self, runner): + """Executes the controller after the iteration. + + Default behavior is to do nothing. Can be overridden in derived classes. + + Args: + runner: The runner to control. + """ + + def is_executable(self, runner): + """Determines whether the controller is executable at current state. + + Basically, the decision is made based on the current running iteration + (epoch) and the execution frequency (i.e., `self.every_n_iters` and + `self.every_n_epochs`). + + If `self.master_only` is set as `True`, this function will also check + whether the current work is the master. + + Args: + runner: The runner to control. + + Returns: + A boolean suggesting whether the controller should be executed. + """ + if self.master_only and runner.rank != 0: + return False + + if self.first_iter and runner.iter - runner.start_iter == 1: + return True + if runner.iter == runner.total_iters: + return True + if self.every_n_iters > 0 and runner.iter % self.every_n_iters == 0: + return True + epoch_to_iter = runner.convert_epoch_to_iter(self.every_n_epochs) + if self.every_n_epochs > 0 and runner.iter % epoch_to_iter == 0: + return True + return False + + def start(self, runner): + """Starts the controller. + + Default behavior is to do nothing. Can be overridden in derived classes. + + Args: + runner: The runner to control. + """ + if self.master_only and runner.rank != 0: + return + self.setup(runner) + + def end(self, runner): + """Ends the controller. + + Default behavior is to do nothing. Can be overridden in derived classes. + + Args: + runner: The runner to control. + """ + if self.master_only and runner.rank != 0: + return + self.close(runner) + + def pre_execute(self, runner): + """Pre-executes the controller before the running of each iteration. + + This function wraps function `self.execute_before_iteration()` and + function `self.is_executable()`. More concretely, the controller will + only be executed at some particular iterations. + + Args: + runner: The runner to control. + """ + if self.is_executable(runner): + self.execute_before_iteration(runner) + + def post_execute(self, runner): + """Post-executes the controller after the running of each iteration. + + This function wraps function `self.execute_before_iteration()` and + function `self.is_executable()`. More concretely, the controller will + only be executed at some particular iterations. + + Args: + runner: The runner to control. + """ + if self.is_executable(runner): + self.execute_after_iteration(runner) diff --git a/ContraCLIP/models/genforce/runners/controllers/cache_cleaner.py b/ContraCLIP/models/genforce/runners/controllers/cache_cleaner.py new file mode 100644 index 0000000000000000000000000000000000000000..7cdf2b3865b8b48bc64fe74fb6e7ca4ad5f5e066 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/cache_cleaner.py @@ -0,0 +1,32 @@ +# python3.7 +"""Contains the running controller to clean cache.""" + +import torch + +from .base_controller import BaseController + +__all__ = ['CacheCleaner'] + + +class CacheCleaner(BaseController): + """Defines the running controller to clean cache. + + This controller is used to empty the GPU cache after each iteration. + + NOTE: The controller is set to `LAST` priority by default. + """ + + def __init__(self, config=None): + config = config or dict() + config.setdefault('priority', 'LAST') + config.setdefault('every_n_iters', 1) + super().__init__(config) + + def setup(self, runner): + torch.cuda.empty_cache() + + def close(self, runner): + torch.cuda.empty_cache() + + def execute_after_iteration(self, runner): + torch.cuda.empty_cache() diff --git a/ContraCLIP/models/genforce/runners/controllers/checkpointer.py b/ContraCLIP/models/genforce/runners/controllers/checkpointer.py new file mode 100644 index 0000000000000000000000000000000000000000..9613c6ba3668708ef94536badf110b38ff0834e5 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/checkpointer.py @@ -0,0 +1,38 @@ +# python3.7 +"""Contains the running controller to handle checkpoints.""" + +import os.path + +from .base_controller import BaseController + +__all__ = ['Checkpointer'] + +class Checkpointer(BaseController): + """Defines the running controller to handle checkpoints. + + This controller is used to save and load checkpoints. + + NOTE: This controller is set to `LAST` priority by default and will only be + executed on the master worker. + """ + + def __init__(self, config): + assert isinstance(config, dict) + config.setdefault('priority', 'LAST') + config.setdefault('master_only', True) + super().__init__(config) + + self._save_dir = config.get('checkpoint_dir', None) + self._save_running_metadata = config.get('save_running_metadata', True) + self._save_learning_rate = config.get('save_learning_rate', True) + self._save_optimizer = config.get('save_optimizer', True) + self._save_running_stats = config.get('save_running_stats', False) + + def execute_after_iteration(self, runner): + save_dir = self._save_dir or runner.work_dir + save_filename = f'checkpoint_iter{runner.iter:06d}.pth' + runner.save(filepath=os.path.join(save_dir, save_filename), + running_metadata=self._save_running_metadata, + learning_rate=self._save_learning_rate, + optimizer=self._save_optimizer, + running_stats=self._save_running_stats) diff --git a/ContraCLIP/models/genforce/runners/controllers/fid_evaluator.py b/ContraCLIP/models/genforce/runners/controllers/fid_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..3a4193918ccf89b55a786507f77638e227c5a891 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/fid_evaluator.py @@ -0,0 +1,55 @@ +# python3.7 +"""Contains the running controller for evaluation.""" + +import os.path +import time + +from .base_controller import BaseController +from ..misc import format_time + +__all__ = ['FIDEvaluator'] + + +class FIDEvaluator(BaseController): + """Defines the running controller for evaluation. + + This controller is used to evalute the GAN model using FID metric. + + NOTE: The controller is set to `LAST` priority by default. + """ + + def __init__(self, config): + assert isinstance(config, dict) + config.setdefault('priority', 'LAST') + super().__init__(config) + + self.num = config.get('num', 50000) + self.ignore_cache = config.get('ignore_cache', False) + self.align_tf = config.get('align_tf', True) + self.file = None + + def setup(self, runner): + assert hasattr(runner, 'fid') + file_path = os.path.join(runner.work_dir, f'metric_fid{self.num}.txt') + if runner.rank == 0: + self.file = open(file_path, 'w') + + def close(self, runner): + if runner.rank == 0: + self.file.close() + + def execute_after_iteration(self, runner): + mode = runner.mode # save runner mode. + start_time = time.time() + fid_value = runner.fid(self.num, + ignore_cache=self.ignore_cache, + align_tf=self.align_tf) + duration_str = format_time(time.time() - start_time) + log_str = (f'FID: {fid_value:.5f} at iter {runner.iter:06d} ' + f'({runner.seen_img / 1000:.1f} kimg). ({duration_str})') + runner.logger.info(log_str) + if runner.rank == 0: + date = time.strftime("%Y-%m-%d %H:%M:%S") + self.file.write(f'[{date}] {log_str}\n') + self.file.flush() + runner.set_mode(mode) # restore runner mode. diff --git a/ContraCLIP/models/genforce/runners/controllers/lr_scheduler.py b/ContraCLIP/models/genforce/runners/controllers/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..51f9959d9dd37a7c3dd28aed00add407a45cc00c --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/lr_scheduler.py @@ -0,0 +1,286 @@ +# python3.7 +"""Contains the running controller to adjust the learing rate.""" + +from torch.optim import lr_scheduler + +from .base_controller import BaseController + +__all__ = ['build_lr_scheduler', 'LRScheduler'] + + +class BaseWarmUpLR(lr_scheduler._LRScheduler): # pylint: disable=protected-access + """Defines a base learning rate scheduler with warm-up. + + NOTE: Different from the official LRSchedulers, the base unit for learning + rate update is always set as `iteration` instead of `epoch`. Hence, the + number of epochs should be converted to number of iterations before using. + """ + + def __init__(self, + optimizer, + warmup_type='NO', + warmup_iters=0, + warmup_factor=0.1): + """Initializes the scheduler with warm-up settings. + + Following warm-up types are supported: + + (1) `NO`: Do not use warm-up. + (2) `CONST`: Use a constant value for warm-up. + (3) `LINEAR`: Increase the learning rate linearly. + (4) `EXP`: Increase the learning rate exponentionally. + + Whatever warm-type is used, the initial learning rate for warm-up (if + needed) is always set as `base_lr * warmup_factor`. + + Args: + optimizer: The optimizer for applying gradients. + warmup_type: The warm-up type. (default: `NO`) + warmup_iters: Iterations for warm-up. (default: 0) + warmup_factor: Factor to set the intital learning rate for warm-up. + (default: 0.1) + """ + self._warmup_type = warmup_type.upper() + assert self.warmup_type in ['NO', 'CONST', 'LINEAR', 'EXP'] + self._warmup_iters = warmup_iters + self._warmup_factor = float(warmup_factor) + super().__init__(optimizer, last_epoch=-1) + + @property + def warmup_type(self): + """Gets the warm-up type.""" + return self._warmup_type + + @property + def warmup_iters(self): + """Gets the iterations for warm-up.""" + return self._warmup_iters + + @property + def warmup_factor(self): + """Gets the warm-up factor.""" + return self._warmup_factor + + def get_warmup_lr(self): + """Gets learning rate at the warm-up stage.""" + progress = self.last_epoch / self.warmup_iters + if self.warmup_type == 'NO': + return self.base_lrs + if self.warmup_type == 'CONST': + return [lr * self.warmup_factor for lr in self.base_lrs] + if self.warmup_type == 'LINEAR': + scale = (1 - progress) * (1 - self.warmup_factor) + return [lr * (1 - scale) for lr in self.base_lrs] + if self.warmup_type == 'EXP': + scale = self.warmup_factor ** (1 - progress) + return [lr * scale for lr in self.base_lrs] + raise ValueError(f'Invalid warm-up type `{self.warmup_type}`!') + + def _get_lr(self): + """Gets the learning rate ignoring warm-up.""" + raise NotImplementedError(f'Should be implemented in derived classes!') + + def get_lr(self): + if self.last_epoch < self.warmup_iters: + return self.get_warmup_lr() + return self._get_lr() + + +class FixedWarmUpLR(BaseWarmUpLR): + """Defines a warm-up LRScheduler with fixed learning rate.""" + + def _get_lr(self): + return self.base_lrs + + +class StepWarmUpLR(BaseWarmUpLR): + """Defines a warm-up LRScheduler with periodically decayed learning rate. + + In particular, the learning rate will be decayed with factor `decay_factor` + every `decay_step` iterations. + + If the `decay_step` is a list of integers, the learning rate will be + adjusted at those particular iterations. + """ + + def __init__(self, + optimizer, + decay_step, + decay_factor=0.1, + warmup_type='NO', + warmup_iters=0, + warmup_factor=0.1): + self._decay_step = decay_step + self._decay_factor = decay_factor + super().__init__(optimizer, warmup_type, warmup_iters, warmup_factor) + + @property + def decay_step(self): + """Gets the decay step.""" + return self._decay_step + + @property + def decay_factor(self): + """Gets the decay factor.""" + return self._decay_factor + + def _get_lr(self): + if isinstance(self.decay_step, int): + scale = self.decay_factor ** (self.last_epoch // self.decay_step) + return [lr * scale for lr in self.base_lrs] + if isinstance(self.decay_step, (list, tuple)): + bucket_id = 0 + for step in set(self.decay_step): + if self.last_epoch >= step: + bucket_id += 1 + scale = self.decay_factor ** bucket_id + return [lr * scale for lr in self.base_lrs] + raise TypeError(f'Type of LR decay step can only be integer, list, ' + f'or tuple, but `{type(self.decay_step)}` is received!') + + +class EXPStepWarmUpLR(BaseWarmUpLR): + """Defines a warm-up LRScheduler with exponentially decayed learning rate. + + In particular, the learning rate will be decayed with factor `decay_factor` + every `decay_step` iterations. + + If the `decay_step` is a list of integers, the learning rate will be + adjusted at those particular iterations. + """ + def __init__(self, + optimizer, + decay_step, + decay_factor=0.1, + warmup_type='NO', + warmup_iters=0, + warmup_factor=0.1): + self._decay_step = decay_step + self._decay_factor = decay_factor + super().__init__(optimizer, warmup_type, warmup_iters, warmup_factor) + + @property + def decay_step(self): + """Gets the decay step.""" + return self._decay_step + + @property + def decay_factor(self): + """Gets the decay factor.""" + return self._decay_factor + + def _get_lr(self): + if isinstance(self.decay_step, int): + scale = self.decay_factor ** (self.last_epoch / self.decay_step) + return [lr * scale for lr in self.base_lrs] + if isinstance(self.decay_step, (list, tuple)): + bucket_id = 0 + for step in set(self.decay_step): + if self.last_epoch >= step: + bucket_id += 1 + scale = self.decay_factor ** bucket_id + return [lr * scale for lr in self.base_lrs] + raise TypeError(f'Type of LR decay step can only be integer, list, ' + f'or tuple, but `{type(self.decay_step)}` is received!') + + +_ALLOWED_LR_TYPES = ['FIXED', 'STEP', 'EXPSTEP'] + + +def build_lr_scheduler(config, optimizer): + """Builds a learning rate scheduler for the given optimizer. + + Basically, the configuration is expected to contain following settings: + + (1) lr_type: The type of the learning rate scheduler. (required) + (2) warmup_type: The warm-up type. (default: `NO`) + (3) warmup_iters: Iterations for warm-up. (default: 0) + (4) warmup_factor: Factor to set the intital learning rate for warm-up. + (default: 0.1) + (5) **kwargs: Additional settings for the scheduler. + + Args: + config: The configuration used to build the learning rate scheduler. + optimizer: The optimizer which the scheduler serves. + + Returns: + A `BaseWarmUpLR` class. + + Raises: + ValueError: The `lr_type` is not supported. + NotImplementedError: If `lr_type` is not implemented. + """ + assert isinstance(config, dict) + lr_type = config['lr_type'].upper() + warmup_type = config.get('warmup_type', 'NO') + warmup_iters = config.get('warmup_iters', 0) + warmup_factor = config.get('warmup_factor', 0.1) + + if lr_type not in _ALLOWED_LR_TYPES: + raise ValueError(f'Invalid learning rate scheduler type `{lr_type}`!' + f'Allowed types: {_ALLOWED_LR_TYPES}.') + + if lr_type == 'FIXED': + return FixedWarmUpLR(optimizer=optimizer, + warmup_type=warmup_type, + warmup_iters=warmup_iters, + warmup_factor=warmup_factor) + if lr_type == 'STEP': + return StepWarmUpLR(optimizer=optimizer, + decay_step=config['decay_step'], + decay_factor=config.get('decay_factor', 0.1), + warmup_type=warmup_type, + warmup_iters=warmup_iters, + warmup_factor=warmup_factor) + if lr_type == 'EXPSTEP': + return EXPStepWarmUpLR(optimizer=optimizer, + decay_step=config['decay_step'], + decay_factor=config.get('decay_factor', 0.1), + warmup_type=warmup_type, + warmup_iters=warmup_iters, + warmup_factor=warmup_factor) + raise NotImplementedError(f'Not implemented scheduler type `{lr_type}`!') + + +class LRScheduler(BaseController): + """Defines the running controller to adjust the learning rate. + + This controller will be executed after every iteration. + + NOTE: The controller is set to `FIRST` priority. + """ + + def __init__(self, lr_config): + assert isinstance(lr_config, dict) + config = { + 'priority': 'FIRST', + 'every_n_iters': 1, + } + super().__init__(config) + self._lr_config = lr_config.copy() + + @property + def lr_config(self): + """Gets the configuration for learning rate scheduler.""" + return self._lr_config + + def setup(self, runner): + for name, config in self.lr_config.items(): + if not name or not config: + continue + if name in runner.lr_schedulers: + raise AttributeError(f'LR Scheduler `{name}` already existed!') + if name not in runner.optimizers: + raise AttributeError(f'Optimizer `{name}` is missing!') + runner.lr_schedulers[name] = build_lr_scheduler( + config, runner.optimizers[name]) + runner.running_stats.add( + f'lr_{name}', log_format='.3e', log_name=f'lr ({name})', + log_strategy='CURRENT') + + def execute_after_iteration(self, runner): + for name, scheduler in runner.lr_schedulers.items(): + scheduler.step() + assert scheduler.last_epoch == runner.iter + current_lr = runner.optimizers[name].param_groups[0]['lr'] + runner.running_stats.update({f'lr_{name}': current_lr}) diff --git a/ContraCLIP/models/genforce/runners/controllers/progress_scheduler.py b/ContraCLIP/models/genforce/runners/controllers/progress_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..eef2b4d67bccbb30da6e7ee562844aa8c6ea3daa --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/progress_scheduler.py @@ -0,0 +1,162 @@ +# python3.7 +"""Contains the running controller to control progressive training. + +This controller is applicable to the models that need to progressively change +the batch size, learning rate, etc. +""" + +import numpy as np + +from .base_controller import BaseController + +__all__ = ['ProgressScheduler'] + +_BATCH_SIZE_SCHEDULE_DICT = { + 4: 16, 8: 8, 16: 4, 32: 2, 64: 1, 128: 1, 256: 1, 512: 1, 1024: 1, +} +_MAX_BATCH_SIZE = 64 + +_LEARNING_RATE_SCHEDULE_DICT = { + 4: 1, 8: 1, 16: 1, 32: 1, 64: 1, 128: 1.5, 256: 2, 512: 3, 1024: 3, +} + + +class ProgressScheduler(BaseController): + """Defines the running controller to control progressive training. + + NOTE: The controller is set to `HIGH` priority by default. + """ + + def __init__(self, config): + assert isinstance(config, dict) + config.setdefault('priority', 'HIGH') + config.setdefault('every_n_iters', 1) + super().__init__(config) + + self.base_batch_size = 0 + self.base_lrs = dict() + + self.total_img = 0 + self.init_res = config.get('init_res', 4) + self.final_res = self.init_res + self.init_lod = 0 + self.batch_size_schedule = config.get('batch_size_schedule', dict()) + self.lr_schedule = config.get('lr_schedule', dict()) + self.minibatch_repeats = config.get('minibatch_repeats', 4) + + self.lod_training_img = config.get('lod_training_img', 600_000) + self.lod_transition_img = config.get('lod_transition_img', 600_000) + self.lod_duration = (self.lod_training_img + self.lod_transition_img) + + # Whether to reset the optimizer state at the beginning of each phase. + self.reset_optimizer = config.get('reset_optimizer', True) + + def get_batch_size(self, resolution): + """Gets batch size for a particular resolution.""" + if self.batch_size_schedule: + return self.batch_size_schedule.get( + f'res{resolution}', self.base_batch_size) + batch_size_scale = _BATCH_SIZE_SCHEDULE_DICT[resolution] + return min(_MAX_BATCH_SIZE, self.base_batch_size * batch_size_scale) + + def get_lr_scale(self, resolution): + """Gets learning rate scale for a particular resolution.""" + if self.lr_schedule: + return self.lr_schedule.get(f'res{resolution}', 1) + return _LEARNING_RATE_SCHEDULE_DICT[resolution] + + def setup(self, runner): + # Set level of detail (lod). + self.final_res = runner.resolution + self.init_lod = np.log2(self.final_res // self.init_res) + runner.lod = -1.0 + + # Save default batch size and learning rate. + self.base_batch_size = runner.batch_size + for lr_name, lr_scheduler in runner.lr_schedulers.items(): + self.base_lrs[lr_name] = lr_scheduler.base_lrs + + # Add running stats for logging. + runner.running_stats.add( + 'kimg', log_format='7.1f', log_name='kimg', log_strategy='CURRENT') + runner.running_stats.add( + 'lod', log_format='4.2f', log_name='lod', log_strategy='CURRENT') + runner.running_stats.add( + 'minibatch', log_format='4d', log_name='minibatch', + log_strategy='CURRENT') + + # Log progressive schedule. + runner.logger.info(f'Progressive Schedule:') + res = self.init_res + lod = int(self.init_lod) + while res <= self.final_res: + batch_size = self.get_batch_size(res) + lr_scale = self.get_lr_scale(res) + runner.logger.info(f' Resolution {res:4d} (lod {lod}): ' + f'batch size ' + f'{batch_size:3d} * {runner.world_size:2d}, ' + f'learning rate scale {lr_scale:.1f}') + res *= 2 + lod -= 1 + assert lod == -1 and res == self.final_res * 2 + + # Compute total running iterations. + assert hasattr(runner.config, 'total_img') + self.total_img = runner.config.total_img + current_img = 0 + num_iters = 0 + while current_img < self.total_img: + phase = (current_img + self.lod_transition_img) // self.lod_duration + phase = np.clip(phase, 0, self.init_lod) + if num_iters % self.minibatch_repeats == 0: + resolution = self.init_res * (2 ** int(phase)) + current_img += self.get_batch_size(resolution) * runner.world_size + num_iters += 1 + runner.total_iters = num_iters + + def execute_before_iteration(self, runner): + is_first_iter = (runner.iter - runner.start_iter == 1) + + # Adjust hyper-parameters only at some particular iteration. + if (not is_first_iter) and (runner.iter % self.minibatch_repeats != 1): + return + + # Compute level-of-details. + phase, subphase = divmod(runner.seen_img, self.lod_duration) + lod = self.init_lod - phase + if self.lod_transition_img: + transition_img = max(subphase - self.lod_training_img, 0) + lod = lod - transition_img / self.lod_transition_img + lod = max(lod, 0.0) + resolution = self.init_res * (2 ** int(np.ceil(self.init_lod - lod))) + batch_size = self.get_batch_size(resolution) + lr_scale = self.get_lr_scale(resolution) + + pre_lod = runner.lod + pre_resolution = runner.train_loader.dataset.resolution + runner.lod = lod + + # Reset optimizer state if needed. + if self.reset_optimizer: + if int(lod) != int(pre_lod) or np.ceil(lod) != np.ceil(pre_lod): + runner.logger.info(f'Reset the optimizer state at ' + f'iter {runner.iter:06d} (lod {lod:.6f}).') + for name in runner.optimizers: + runner.optimizers[name].state.clear() + + # Rebuild the dataset and adjust the learing rate if needed. + if is_first_iter or resolution != pre_resolution: + runner.logger.info(f'Rebuild the dataset at ' + f'iter {runner.iter:06d} (lod {lod:.6f}).') + runner.train_loader.overwrite_param( + batch_size=batch_size, resolution=resolution) + runner.batch_size = batch_size + for lr_name, base_lrs in self.base_lrs.items(): + runner.lr_schedulers[lr_name].base_lrs = [ + lr * lr_scale for lr in base_lrs] + + def execute_after_iteration(self, runner): + minibatch = runner.batch_size * runner.world_size + runner.running_stats.update({'kimg': runner.seen_img / 1000}) + runner.running_stats.update({'lod': runner.lod}) + runner.running_stats.update({'minibatch': minibatch}) diff --git a/ContraCLIP/models/genforce/runners/controllers/running_logger.py b/ContraCLIP/models/genforce/runners/controllers/running_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..e18c87efa5643ebff70b2261fdff47e1a3ce10d9 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/running_logger.py @@ -0,0 +1,99 @@ +# python3.7 +"""Contains the running controller to save the running log.""" + +import os +import json + +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) # Ignore TF warning. + +# pylint: disable=wrong-import-position +import torch +from torch.utils.tensorboard import SummaryWriter + +from ..misc import format_time +from .base_controller import BaseController +# pylint: enable=wrong-import-position + +__all__ = ['RunningLogger'] + + +class RunningLogger(BaseController): + """Defines the running controller to save the running log. + + This controller is able to save the log message in different formats: + + (1) Text format, which will be printed on screen and saved to the log file. + (2) JSON format, which will be saved to `{runner.work_dir}/log.json`. + (3) Tensorboard format. + + NOTE: The controller is set to `90` priority by default and will only be + executed on the master worker. + """ + + def __init__(self, config=None): + config = config or dict() + config.setdefault('priority', 90) + config.setdefault('every_n_iters', 1) + config.setdefault('master_only', True) + super().__init__(config) + + self._text_format = config.get('text_format', True) + self._log_order = config.get('log_order', None) + self._json_format = config.get('json_format', True) + self._json_logpath = self._json_filename = 'log.json' + self._tensorboard_format = config.get('tensorboard_format', True) + self.tensorboard_writer = None + + def setup(self, runner): + if self._text_format: + runner.running_stats.log_order = self._log_order + if self._json_format: + self._json_logpath = os.path.join( + runner.work_dir, self._json_filename) + if self._tensorboard_format: + event_dir = os.path.join(runner.work_dir, 'events') + os.makedirs(event_dir, exist_ok=True) + self.tensorboard_writer = SummaryWriter(log_dir=event_dir) + + def close(self, runner): + if self._tensorboard_format: + self.tensorboard_writer.close() + + def execute_after_iteration(self, runner): + # Prepare log data. + log_data = {name: stats.get_log_value() + for name, stats in runner.running_stats.stats_pool.items()} + + # Save in text format. + msg = f'Iter {runner.iter:6d}/{runner.total_iters:6d}' + msg += f', {runner.running_stats}' + memory = torch.cuda.max_memory_allocated() / (1024 ** 3) + msg += f' (memory: {memory:.1f}G)' + if 'iter_time' in log_data: + eta = log_data['iter_time'] * (runner.total_iters - runner.iter) + msg += f' (ETA: {format_time(eta)})' + runner.logger.info(msg) + + # Save in JSON format. + if self._json_format: + with open(self._json_logpath, 'a+') as f: + json.dump(log_data, f) + f.write('\n') + + # Save in Tensorboard format. + if self._tensorboard_format: + for name, value in log_data.items(): + if name in ['data_time', 'iter_time', 'run_time']: + continue + if name.startswith('loss_'): + self.tensorboard_writer.add_scalar( + name.replace('loss_', 'loss/'), value, runner.iter) + elif name.startswith('lr_'): + self.tensorboard_writer.add_scalar( + name.replace('lr_', 'learning_rate/'), value, runner.iter) + else: + self.tensorboard_writer.add_scalar(name, value, runner.iter) + + # Clear running stats. + runner.running_stats.clear() diff --git a/ContraCLIP/models/genforce/runners/controllers/snapshoter.py b/ContraCLIP/models/genforce/runners/controllers/snapshoter.py new file mode 100644 index 0000000000000000000000000000000000000000..c33f496cba7e6a3d54f8c19e7ebcc4d14c210036 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/snapshoter.py @@ -0,0 +1,31 @@ +# python3.7 +"""Contains the running controller for saving snapshot.""" + +from .base_controller import BaseController + +__all__ = ['Snapshoter'] + + +class Snapshoter(BaseController): + """Defines the running controller for evaluation. + + NOTE: The controller is set to `LAST` priority by default. + """ + + def __init__(self, config): + config.setdefault('priority', 'LAST') + super().__init__(config) + + self.num = config.get('num', 100) + + def setup(self, runner): + assert hasattr(runner, 'synthesize') + + def execute_after_iteration(self, runner): + mode = runner.mode # save runner mode. + runner.synthesize(self.num, + html_name=f'snapshot_{runner.iter:06d}.html', + save_raw_synthesis=False) + runner.logger.info(f'Saving snapshot at iter {runner.iter:06d} ' + f'({runner.seen_img / 1000:.1f} kimg).') + runner.set_mode(mode) # restore runner mode. diff --git a/ContraCLIP/models/genforce/runners/controllers/timer.py b/ContraCLIP/models/genforce/runners/controllers/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..d0cba96e4bbdc8be53939305039e4ead4d688cf5 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/controllers/timer.py @@ -0,0 +1,54 @@ +# python3.7 +"""Contains the running controller to record time.""" + +import time + +from .base_controller import BaseController + +__all__ = ['Timer'] + + +class Timer(BaseController): + """Defines the running controller to record running time. + + This controller will be executed every iteration (both before and after) to + summarize the data preparation time as well as the model running time. + Besides, this controller will also mark the start and end time of the + running process. + + NOTE: This controller is set to `LOW` priority by default and will only be + executed on the master worker. + """ + + def __init__(self, config=None): + config = config or dict() + config.setdefault('priority', 'LOW') + config.setdefault('every_n_iters', 1) + config.setdefault('master_only', True) + super().__init__(config) + + self.time = time.time() + + def setup(self, runner): + runner.running_stats.add( + 'data_time', log_format='time', log_name='data time') + runner.running_stats.add( + 'iter_time', log_format='time', log_name='iter time') + runner.running_stats.add( + 'run_time', log_format='time', log_name='run time', + log_strategy='CURRENT') + self.time = time.time() + runner.start_time = self.time + + def close(self, runner): + runner.end_time = time.time() + + def execute_before_iteration(self, runner): + start_time = time.time() + runner.running_stats.update({'data_time': start_time - self.time}) + + def execute_after_iteration(self, runner): + end_time = time.time() + runner.running_stats.update({'iter_time': end_time - self.time}) + runner.running_stats.update({'run_time': end_time - runner.start_time}) + self.time = end_time diff --git a/ContraCLIP/models/genforce/runners/encoder_runner.py b/ContraCLIP/models/genforce/runners/encoder_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..0ffd72a0682a1bbf65bf80133c1b1b0a0f5340a3 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/encoder_runner.py @@ -0,0 +1,44 @@ +# python3.7 +"""Contains the runner for Encoder.""" + +from copy import deepcopy + +from .base_encoder_runner import BaseEncoderRunner + +__all__ = ['EncoderRunner'] + + +class EncoderRunner(BaseEncoderRunner): + """Defines the runner for Enccoder Training.""" + + def build_models(self): + super().build_models() + if 'generator_smooth' not in self.models: + self.models['generator_smooth'] = deepcopy(self.models['generator']) + super().load(self.config.get('gan_model_path'), + running_metadata=False, + learning_rate=False, + optimizer=False, + running_stats=False) + + def train_step(self, data, **train_kwargs): + self.set_model_requires_grad('generator', False) + + # E_loss + self.set_model_requires_grad('discriminator', False) + self.set_model_requires_grad('encoder', True) + E_loss = self.loss.e_loss(self, data) + self.optimizers['encoder'].zero_grad() + E_loss.backward() + self.optimizers['encoder'].step() + + # D_loss + self.set_model_requires_grad('discriminator', True) + self.set_model_requires_grad('encoder', False) + D_loss = self.loss.d_loss(self, data) + self.optimizers['discriminator'].zero_grad() + D_loss.backward() + self.optimizers['discriminator'].step() + + def load(self, **kwargs): + super().load(**kwargs) diff --git a/ContraCLIP/models/genforce/runners/losses/__init__.py b/ContraCLIP/models/genforce/runners/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b673d9c3672d4443730ae6b68594736c6344a54 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/losses/__init__.py @@ -0,0 +1,7 @@ +# python3.7 +"""Collects all loss functions.""" + +from .logistic_gan_loss import LogisticGANLoss +from .encoder_loss import EncoderLoss + +__all__ = ['LogisticGANLoss', 'EncoderLoss'] diff --git a/ContraCLIP/models/genforce/runners/losses/encoder_loss.py b/ContraCLIP/models/genforce/runners/losses/encoder_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..4995d7d5e6292d569beb43643e7a48e71e8dce96 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/losses/encoder_loss.py @@ -0,0 +1,144 @@ +# python3.7 +"""Defines loss functions for encoder training.""" + +import torch +import torch.nn.functional as F + +from models import build_perceptual + +__all__ = ['EncoderLoss'] + + +class EncoderLoss(object): + """Contains the class to compute logistic GAN loss.""" + + def __init__(self, + runner, + d_loss_kwargs=None, + e_loss_kwargs=None, + perceptual_kwargs=None): + """Initializes with models and arguments for computing losses.""" + self.d_loss_kwargs = d_loss_kwargs or dict() + self.e_loss_kwargs = e_loss_kwargs or dict() + self.r1_gamma = self.d_loss_kwargs.get('r1_gamma', 10.0) + self.r2_gamma = self.d_loss_kwargs.get('r2_gamma', 0.0) + + self.perceptual_lw = self.e_loss_kwargs.get('perceptual_lw', 5e-5) + self.adv_lw = self.e_loss_kwargs.get('adv_lw', 0.1) + + self.perceptual_model = build_perceptual(**perceptual_kwargs).cuda() + self.perceptual_model.eval() + for param in self.perceptual_model.parameters(): + param.requires_grad = False + + runner.space_of_latent = runner.config.space_of_latent + + runner.running_stats.add( + f'recon_loss', log_format='.3f', log_strategy='AVERAGE') + runner.running_stats.add( + f'adv_loss', log_format='.3f', log_strategy='AVERAGE') + runner.running_stats.add( + f'loss_fake', log_format='.3f', log_strategy='AVERAGE') + runner.running_stats.add( + f'loss_real', log_format='.3f', log_strategy='AVERAGE') + if self.r1_gamma != 0: + runner.running_stats.add( + f'real_grad_penalty', log_format='.3f', log_strategy='AVERAGE') + if self.r2_gamma != 0: + runner.running_stats.add( + f'fake_grad_penalty', log_format='.3f', log_strategy='AVERAGE') + + @staticmethod + def compute_grad_penalty(images, scores): + """Computes gradient penalty.""" + image_grad = torch.autograd.grad( + outputs=scores.sum(), + inputs=images, + create_graph=True, + retain_graph=True)[0].view(images.shape[0], -1) + penalty = image_grad.pow(2).sum(dim=1).mean() + return penalty + + def d_loss(self, runner, data): + """Computes loss for discriminator.""" + if 'generator_smooth' in runner.models: + G = runner.get_module(runner.models['generator_smooth']) + else: + G = runner.get_module(runner.models['generator']) + G.eval() + D = runner.models['discriminator'] + E = runner.models['encoder'] + + reals = data['image'] + reals.requires_grad = True + + with torch.no_grad(): + latents = E(reals) + if runner.space_of_latent == 'z': + reals_rec = G(latents, **runner.G_kwargs_val)['image'] + elif runner.space_of_latent == 'wp': + reals_rec = G.synthesis(latents, + **runner.G_kwargs_val)['image'] + elif runner.space_of_latent == 'y': + G.set_space_of_latent('y') + reals_rec = G.synthesis(latents, + **runner.G_kwargs_val)['image'] + real_scores = D(reals, **runner.D_kwargs_train) + fake_scores = D(reals_rec, **runner.D_kwargs_train) + loss_fake = F.softplus(fake_scores).mean() + loss_real = F.softplus(-real_scores).mean() + d_loss = loss_fake + loss_real + + runner.running_stats.update({'loss_fake': loss_fake.item()}) + runner.running_stats.update({'loss_real': loss_real.item()}) + + real_grad_penalty = torch.zeros_like(d_loss) + fake_grad_penalty = torch.zeros_like(d_loss) + if self.r1_gamma: + real_grad_penalty = self.compute_grad_penalty(reals, real_scores) + runner.running_stats.update( + {'real_grad_penalty': real_grad_penalty.item()}) + if self.r2_gamma: + fake_grad_penalty = self.compute_grad_penalty( + reals_rec, fake_scores) + runner.running_stats.update( + {'fake_grad_penalty': fake_grad_penalty.item()}) + + return (d_loss + + real_grad_penalty * (self.r1_gamma * 0.5) + + fake_grad_penalty * (self.r2_gamma * 0.5)) + + def e_loss(self, runner, data): + """Computes loss for generator.""" + if 'generator_smooth' in runner.models: + G = runner.get_module(runner.models['generator_smooth']) + else: + G = runner.get_module(runner.models['generator']) + G.eval() + D = runner.models['discriminator'] + E = runner.models['encoder'] + P = self.perceptual_model + + # Fetch data + reals = data['image'] + + latents = E(reals) + if runner.space_of_latent == 'z': + reals_rec = G(latents, **runner.G_kwargs_val)['image'] + elif runner.space_of_latent == 'wp': + reals_rec = G.synthesis(latents, **runner.G_kwargs_val)['image'] + elif runner.space_of_latent == 'y': + G.set_space_of_latent('y') + reals_rec = G.synthesis(latents, **runner.G_kwargs_val)['image'] + loss_pix = F.mse_loss(reals_rec, reals, reduction='mean') + loss_feat = self.perceptual_lw * F.mse_loss( + P(reals_rec), P(reals), reduction='mean') + loss_rec = loss_pix + loss_feat + fake_scores = D(reals_rec, **runner.D_kwargs_train) + adv_loss = self.adv_lw * F.softplus(-fake_scores).mean() + e_loss = loss_pix + loss_feat + adv_loss + + runner.running_stats.update({'recon_loss': loss_rec.item()}) + runner.running_stats.update({'adv_loss': adv_loss.item()}) + + return e_loss diff --git a/ContraCLIP/models/genforce/runners/losses/logistic_gan_loss.py b/ContraCLIP/models/genforce/runners/losses/logistic_gan_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..f241d73d93c5c67e829b0976a0f816ed0ecbd57d --- /dev/null +++ b/ContraCLIP/models/genforce/runners/losses/logistic_gan_loss.py @@ -0,0 +1,112 @@ +# python3.7 +"""Defines loss functions for GAN training.""" + +import numpy as np + +import torch +import torch.nn.functional as F + +__all__ = ['LogisticGANLoss'] + +apply_loss_scaling = lambda x: x * torch.exp(x * np.log(2.0)) +undo_loss_scaling = lambda x: x * torch.exp(-x * np.log(2.0)) + + +class LogisticGANLoss(object): + """Contains the class to compute logistic GAN loss.""" + + def __init__(self, runner, d_loss_kwargs=None, g_loss_kwargs=None): + """Initializes with models and arguments for computing losses.""" + self.d_loss_kwargs = d_loss_kwargs or dict() + self.g_loss_kwargs = g_loss_kwargs or dict() + self.r1_gamma = self.d_loss_kwargs.get('r1_gamma', 10.0) + self.r2_gamma = self.d_loss_kwargs.get('r2_gamma', 0.0) + + runner.running_stats.add( + f'g_loss', log_format='.3f', log_strategy='AVERAGE') + runner.running_stats.add( + f'd_loss', log_format='.3f', log_strategy='AVERAGE') + if self.r1_gamma != 0: + runner.running_stats.add( + f'real_grad_penalty', log_format='.3f', log_strategy='AVERAGE') + if self.r2_gamma != 0: + runner.running_stats.add( + f'fake_grad_penalty', log_format='.3f', log_strategy='AVERAGE') + + @staticmethod + def preprocess_image(images, lod=0, **_unused_kwargs): + """Pre-process images.""" + if lod != int(lod): + downsampled_images = F.avg_pool2d( + images, kernel_size=2, stride=2, padding=0) + upsampled_images = F.interpolate( + downsampled_images, scale_factor=2, mode='nearest') + alpha = lod - int(lod) + images = images * (1 - alpha) + upsampled_images * alpha + if int(lod) == 0: + return images + return F.interpolate( + images, scale_factor=(2 ** int(lod)), mode='nearest') + + @staticmethod + def compute_grad_penalty(images, scores): + """Computes gradient penalty.""" + image_grad = torch.autograd.grad( + outputs=scores.sum(), + inputs=images, + create_graph=True, + retain_graph=True)[0].view(images.shape[0], -1) + penalty = image_grad.pow(2).sum(dim=1).mean() + return penalty + + def d_loss(self, runner, data): + """Computes loss for discriminator.""" + G = runner.models['generator'] + D = runner.models['discriminator'] + + reals = self.preprocess_image(data['image'], lod=runner.lod) + reals.requires_grad = True + labels = data.get('label', None) + + latents = torch.randn(reals.shape[0], runner.z_space_dim).cuda() + latents.requires_grad = True + # TODO: Use random labels. + fakes = G(latents, label=labels, **runner.G_kwargs_train)['image'] + real_scores = D(reals, label=labels, **runner.D_kwargs_train) + fake_scores = D(fakes, label=labels, **runner.D_kwargs_train) + + d_loss = F.softplus(fake_scores).mean() + d_loss += F.softplus(-real_scores).mean() + runner.running_stats.update({'d_loss': d_loss.item()}) + + real_grad_penalty = torch.zeros_like(d_loss) + fake_grad_penalty = torch.zeros_like(d_loss) + if self.r1_gamma: + real_grad_penalty = self.compute_grad_penalty(reals, real_scores) + runner.running_stats.update( + {'real_grad_penalty': real_grad_penalty.item()}) + if self.r2_gamma: + fake_grad_penalty = self.compute_grad_penalty(fakes, fake_scores) + runner.running_stats.update( + {'fake_grad_penalty': fake_grad_penalty.item()}) + + return (d_loss + + real_grad_penalty * (self.r1_gamma * 0.5) + + fake_grad_penalty * (self.r2_gamma * 0.5)) + + def g_loss(self, runner, data): # pylint: disable=no-self-use + """Computes loss for generator.""" + # TODO: Use random labels. + G = runner.models['generator'] + D = runner.models['discriminator'] + batch_size = data['image'].shape[0] + labels = data.get('label', None) + + latents = torch.randn(batch_size, runner.z_space_dim).cuda() + fakes = G(latents, label=labels, **runner.G_kwargs_train)['image'] + fake_scores = D(fakes, label=labels, **runner.D_kwargs_train) + + g_loss = F.softplus(-fake_scores).mean() + runner.running_stats.update({'g_loss': g_loss.item()}) + + return g_loss diff --git a/ContraCLIP/models/genforce/runners/misc.py b/ContraCLIP/models/genforce/runners/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..a29d9de7bf37258f175cda206de151f4eeafefc0 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/misc.py @@ -0,0 +1,37 @@ +# python3.7 +"""Misc utility functions used for model running.""" + +__all__ = ['format_time'] + + +def format_time(seconds): + """Formats seconds to readable time string. + + Args: + seconds: Number of seconds to format. + + Returns: + The formatted time string. + + Raises: + ValueError: If the input `seconds` is less than 0. + """ + if seconds < 0: + raise ValueError(f'Input `seconds` should be greater than or equal to ' + f'0, but `{seconds}` is received!') + + # Returns seconds as float if less than 1 minute. + if seconds < 10: + return f'{seconds:5.3f}s' + if seconds < 60: + return f'{seconds:5.2f}s' + + seconds = int(seconds + 0.5) + days, seconds = divmod(seconds, 86400) + hours, seconds = divmod(seconds, 3600) + minutes, seconds = divmod(seconds, 60) + if days: + return f'{days:2d}d{hours:02d}h' + if hours: + return f'{hours:2d}h{minutes:02d}m' + return f'{minutes:2d}m{seconds:02d}s' diff --git a/ContraCLIP/models/genforce/runners/optimizer.py b/ContraCLIP/models/genforce/runners/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..15ab801d9f046268328f6f06dd3e78720ebece62 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/optimizer.py @@ -0,0 +1,237 @@ +# python3.7 +"""Contains the function to build optimizer for runner.""" + +import math + +import torch + +__all__ = ['build_optimizer', 'build_optimizers'] + +_ALLOWED_OPT_TYPES = ['SGD', 'ADAM'] + + +def build_optimizer(config, model): + """Builds an optimizer for the given model. + + Basically, the configuration is expected to contain following settings: + + (1) opt_type: The type of the optimizer. (required) + (2) base_lr: The base learning rate for all parameters. (required) + (3) base_wd: The base weight decay for all parameters. (default: 0.0) + (4) bias_lr_multiplier: The learning rate multiplier for bias parameters. + (default: 1.0) + (5) bias_wd_multiplier: The weight decay multiplier for bias parameters. + (default: 1.0) + (6) **kwargs: Additional settings for the optimizer, such as `momentum`. + + Args: + config: The configuration used to build the optimizer. + model: The model which the optimizer serves. + + Returns: + A `torch.optim.Optimizer`. + + Raises: + ValueError: The `opt_type` is not supported. + NotImplementedError: If `opt_type` is not implemented. + """ + assert isinstance(config, dict) + opt_type = config['opt_type'].upper() + base_lr = config['base_lr'] + base_wd = config.get('base_wd', 0.0) + bias_lr_multiplier = config.get('bias_lr_multiplier', 1.0) + bias_wd_multiplier = config.get('bias_wd_multiplier', 1.0) + + if opt_type not in _ALLOWED_OPT_TYPES: + raise ValueError(f'Invalid optimizer type `{opt_type}`!' + f'Allowed types: {_ALLOWED_OPT_TYPES}.') + + model_params = [] + for param_name, param in model.named_parameters(): + param_group = {'params': [param]} + if param.requires_grad: + if 'bias' in param_name: + param_group['lr'] = base_lr * bias_lr_multiplier + param_group['weight_decay'] = base_wd * bias_wd_multiplier + else: + param_group['lr'] = base_lr + param_group['weight_decay'] = base_wd + model_params.append(param_group) + + if opt_type == 'SGD': + return torch.optim.SGD(params=model_params, + lr=base_lr, + momentum=config.get('momentum', 0.9), + dampening=config.get('dampening', 0), + weight_decay=base_wd, + nesterov=config.get('nesterov', False)) + if opt_type == 'ADAM': + return AdamOptimizer(params=model_params, + lr=base_lr, + betas=config.get('betas', (0.9, 0.999)), + eps=config.get('eps', 1e-8), + weight_decay=base_wd, + amsgrad=config.get('amsgrad', False)) + raise NotImplementedError(f'Not implemented optimizer type `{opt_type}`!') + + +def build_optimizers(opt_config, runner): + """Builds optimizers for the given runner. + + The `opt_config` should be a dictionary, where keys are model names and + each value is the optimizer configuration for a particumar model. All built + optimizers will be saved in `runner.optimizers`, which is also a dictionary. + + NOTE: The model names should match the keys of `runner.models`. + + Args: + opt_config: The configuration to build the optimizers. + runner: The runner to build the optimizer for. + """ + if not opt_config: + return + + assert isinstance(opt_config, dict) + for name, config in opt_config.items(): + if not name or not config: + continue + if name in runner.optimizers: + raise AttributeError(f'Optimizer `{name}` has already existed!') + if name not in runner.models: + raise AttributeError(f'Model `{name}` is missing!') + runner.optimizers[name] = build_optimizer(config, runner.models[name]) + + +# We slightly modify the Adam optimizer from `torch.optim`. since there exists +# some discrepancies between the `torch.optim` version and the TensorFlow +# version. The main difference is where to add the `epsilon`. +# TODO: The modified optimizer does not support `amsgrad` any more. + +# pylint: disable=line-too-long +# pylint: disable=unneeded-not +# pylint: disable=misplaced-comparison-constant +# pylint: disable=super-with-arguments + +class AdamOptimizer(torch.optim.Optimizer): + r"""Implements Adam algorithm. + + It has been proposed in `Adam: A Method for Stochastic Optimization`_. + The implementation of the L2 penalty follows changes proposed in + `Decoupled Weight Decay Regularization`_. + + Arguments: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 1e-3) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.999)) + eps (float, optional): term added to the denominator to improve + numerical stability (default: 1e-8) + weight_decay (float, optional): weight decay (L2 penalty) (default: 0) + amsgrad (boolean, optional): whether to use the AMSGrad variant of this + algorithm from the paper `On the Convergence of Adam and Beyond`_ + (default: False) + + .. _Adam\: A Method for Stochastic Optimization: + https://arxiv.org/abs/1412.6980 + .. _Decoupled Weight Decay Regularization: + https://arxiv.org/abs/1711.05101 + .. _On the Convergence of Adam and Beyond: + https://openreview.net/forum?id=ryQu7f-RZ + """ + + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=0, amsgrad=False): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + defaults = dict(lr=lr, betas=betas, eps=eps, + weight_decay=weight_decay, amsgrad=amsgrad) + super(AdamOptimizer, self).__init__(params, defaults) + + def __setstate__(self, state): + super(AdamOptimizer, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('amsgrad', False) + + @torch.no_grad() + def step(self, closure=None): + """Performs a single optimization step. + + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad + if grad.is_sparse: + raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') + amsgrad = group['amsgrad'] + assert not amsgrad + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) + # Exponential moving average of squared gradient values + state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + if amsgrad: + # Maintains max of all exp. moving avg. of sq. grad. values + state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) + + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + # if amsgrad: + # max_exp_avg_sq = state['max_exp_avg_sq'] + beta1, beta2 = group['betas'] + + state['step'] += 1 + bias_correction1 = 1 - beta1 ** state['step'] + bias_correction2 = 1 - beta2 ** state['step'] + + if group['weight_decay'] != 0: + grad = grad.add(p, alpha=group['weight_decay']) + + # Decay the first and second moment running average coefficient + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + + # if amsgrad: + # # Maintains the maximum of all 2nd moment running avg. till now + # torch.maximum(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) + # # Use the max. for normalizing running avg. of gradient + # denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + # else: + # denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + + # step_size = group['lr'] / bias_correction1 + + # p.addcdiv_(exp_avg, denom, value=-step_size) + + step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 + + p.addcdiv_(exp_avg, exp_avg_sq.sqrt().add_(group['eps']) , value=-step_size) + + return loss + +# pylint: enable=line-too-long +# pylint: enable=unneeded-not +# pylint: enable=misplaced-comparison-constant +# pylint: enable=super-with-arguments diff --git a/ContraCLIP/models/genforce/runners/running_stats.py b/ContraCLIP/models/genforce/runners/running_stats.py new file mode 100644 index 0000000000000000000000000000000000000000..7155d7ff4708511ae22164e4b2ba959ce04a8416 --- /dev/null +++ b/ContraCLIP/models/genforce/runners/running_stats.py @@ -0,0 +1,207 @@ +# python3.7 +"""Contains the class for recording the running stats. + +Here, running stats refers to the statictical information in the running +process, such as loss values, learning rates, running time, etc. +""" + +from .misc import format_time + +__all__ = ['SingleStats', 'RunningStats'] + + +class SingleStats(object): + """A class to record the stats corresponding to a particular variable. + + This class is log-friendly and supports customized log format, including: + + (1) Numerical log format, such as `.3f`, `.1e`, `05d`, and `>10s`. + (2) Customized log name (name of the stats to show in the log). + (3) Additional string (e.g., measure unit) as the tail of log message. + + Furthermore, this class also supports logging the stats with different + strategies, including: + + (1) CURRENT: The current value will be logged. + (2) AVERAGE: The averaged value (from the beginning) will be logged. + (3) SUM: The cumulative value (from the beginning) will be logged. + """ + + def __init__(self, + name, + log_format='.3f', + log_name=None, + log_tail=None, + log_strategy='AVERAGE'): + """Initializes the stats with log format. + + Args: + name: Name of the stats. Should be a string without spaces. + log_format: The numerical log format. Use `time` to log time + duration. (default: `.3f`) + log_name: The name shown in the log. `None` means to directly use + the stats name. (default: None) + log_tail: The tailing log message. (default: None) + log_strategy: Strategy to log this stats. `CURRENT`, `AVERAGE`, and + `SUM` are supported. (default: `AVERAGE`) + + Raises: + ValueError: If the input `log_strategy` is not supported. + """ + log_strategy = log_strategy.upper() + if log_strategy not in ['CURRENT', 'AVERAGE', 'SUM']: + raise ValueError(f'Invalid log strategy `{self.log_strategy}`!') + + self._name = name + self._log_format = log_format + self._log_name = log_name or name + self._log_tail = log_tail or '' + self._log_strategy = log_strategy + + # Stats Data. + self.val = 0 # Current value. + self.sum = 0 # Cumulative value. + self.avg = 0 # Averaged value. + self.cnt = 0 # Count number. + + @property + def name(self): + """Gets the name of the stats.""" + return self._name + + @property + def log_format(self): + """Gets tne numerical log format of the stats.""" + return self._log_format + + @property + def log_name(self): + """Gets the log name of the stats.""" + return self._log_name + + @property + def log_tail(self): + """Gets the tailing log message of the stats.""" + return self._log_tail + + @property + def log_strategy(self): + """Gets the log strategy of the stats.""" + return self._log_strategy + + def clear(self): + """Clears the stats data.""" + self.val = 0 + self.sum = 0 + self.avg = 0 + self.cnt = 0 + + def update(self, value): + """Updates the stats data.""" + self.val = value + self.cnt = self.cnt + 1 + self.sum = self.sum + value + self.avg = self.sum / self.cnt + + def get_log_value(self): + """Gets value for logging according to the log strategy.""" + if self.log_strategy == 'CURRENT': + return self.val + if self.log_strategy == 'AVERAGE': + return self.avg + if self.log_strategy == 'SUM': + return self.sum + raise NotImplementedError(f'Log strategy `{self.log_strategy}` is not ' + f'implemented!') + + def __str__(self): + """Gets log message.""" + if self.log_format == 'time': + value_str = f'{format_time(self.get_log_value())}' + else: + value_str = f'{self.get_log_value():{self.log_format}}' + return f'{self.log_name}: {value_str}{self.log_tail}' + + +class RunningStats(object): + """A class to record all the running stats. + + Basically, this class contains a dictionary of SingleStats. + + Example: + + running_stats = RunningStats() + running_stats.add('loss', log_format='.3f', log_strategy='AVERAGE') + running_stats.add('time', log_format='time', log_name='Iter Time', + log_strategy='CURRENT') + running_stats.log_order = ['time', 'loss'] + running_stats.update({'loss': 0.46, 'time': 12}) + running_stats.update({'time': 14.5, 'loss': 0.33}) + print(running_stats) + """ + + def __init__(self, log_delimiter=', '): + """Initializes the running stats with the log delimiter. + + Args: + log_delimiter: This delimiter is used to connect the log messages + from different stats. (default: `, `) + """ + self._log_delimiter = log_delimiter + self.stats_pool = dict() # The stats pool. + self.log_order = None # Order of the stats to log. + + @property + def log_delimiter(self): + """Gets the log delimiter between different stats.""" + return self._log_delimiter + + def add(self, name, **kwargs): + """Adds a new SingleStats to the dictionary. + + Additional arguments include: + + log_format: The numerical log format. Use `time` to log time duration. + (default: `.3f`) + log_name: The name shown in the log. `None` means to directly use the + stats name. (default: None) + log_tail: The tailing log message. (default: None) + log_strategy: Strategy to log this stats. `CURRENT`, `AVERAGE`, and + `SUM` are supported. (default: `AVERAGE`) + """ + if name in self.stats_pool: + return + self.stats_pool[name] = SingleStats(name, **kwargs) + + def clear(self, exclude_list=None): + """Clears the stats data (if needed). + + Args: + exclude_list: A list of stats names whose data will not be cleared. + """ + exclude_list = set(exclude_list or []) + for name, stats in self.stats_pool.items(): + if name not in exclude_list: + stats.clear() + + def update(self, kwargs): + """Updates the stats data by name.""" + for name, value in kwargs.items(): + if name not in self.stats_pool: + self.add(name) + self.stats_pool[name].update(value) + + def __getattr__(self, name): + """Gets a particular SingleStats by name.""" + if name in self.stats_pool: + return self.stats_pool[name] + if name in self.__dict__: + return self.__dict__[name] + raise AttributeError(f'`{self.__class__.__name__}` object has no ' + f'attribute `{name}`!') + + def __str__(self): + """Gets log message.""" + self.log_order = self.log_order or list(self.stats_pool) + log_strings = [str(self.stats_pool[name]) for name in self.log_order] + return self.log_delimiter.join(log_strings) diff --git a/ContraCLIP/models/genforce/runners/stylegan_runner.py b/ContraCLIP/models/genforce/runners/stylegan_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..32189554dc4642e1102bf0a154909b354dab1aae --- /dev/null +++ b/ContraCLIP/models/genforce/runners/stylegan_runner.py @@ -0,0 +1,72 @@ +# python3.7 +"""Contains the runner for StyleGAN.""" + +from copy import deepcopy + +from .base_gan_runner import BaseGANRunner + +__all__ = ['StyleGANRunner'] + + +class StyleGANRunner(BaseGANRunner): + """Defines the runner for StyleGAN.""" + + def __init__(self, config, logger): + super().__init__(config, logger) + self.lod = getattr(self, 'lod', None) + + def build_models(self): + super().build_models() + self.g_smooth_img = self.config.modules['generator'].get( + 'g_smooth_img', 10000) + self.models['generator_smooth'] = deepcopy(self.models['generator']) + + def build_loss(self): + super().build_loss() + self.running_stats.add( + f'Gs_beta', log_format='.4f', log_strategy='CURRENT') + + def train_step(self, data, **train_kwargs): + # Set level-of-details. + G = self.get_module(self.models['generator']) + D = self.get_module(self.models['discriminator']) + Gs = self.get_module(self.models['generator_smooth']) + G.synthesis.lod.data.fill_(self.lod) + D.lod.data.fill_(self.lod) + Gs.synthesis.lod.data.fill_(self.lod) + + # Update discriminator. + self.set_model_requires_grad('discriminator', True) + self.set_model_requires_grad('generator', False) + + d_loss = self.loss.d_loss(self, data) + self.optimizers['discriminator'].zero_grad() + d_loss.backward() + self.optimizers['discriminator'].step() + + # Life-long update for generator. + beta = 0.5 ** (self.batch_size * self.world_size / self.g_smooth_img) + self.running_stats.update({'Gs_beta': beta}) + self.moving_average_model(model=self.models['generator'], + avg_model=self.models['generator_smooth'], + beta=beta) + + # Update generator. + if self._iter % self.config.get('D_repeats', 1) == 0: + self.set_model_requires_grad('discriminator', False) + self.set_model_requires_grad('generator', True) + g_loss = self.loss.g_loss(self, data) + self.optimizers['generator'].zero_grad() + g_loss.backward() + self.optimizers['generator'].step() + + def load(self, **kwargs): + super().load(**kwargs) + G = self.get_module(self.models['generator']) + D = self.get_module(self.models['discriminator']) + Gs = self.get_module(self.models['generator_smooth']) + if kwargs['running_metadata']: + lod = G.synthesis.lod.cpu().tolist() + assert lod == D.lod.cpu().tolist() + assert lod == Gs.synthesis.lod.cpu().tolist() + self.lod = lod diff --git a/ContraCLIP/models/genforce/scripts/dist_test.sh b/ContraCLIP/models/genforce/scripts/dist_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..e14bac7c9bf717facd1582f564fcf43ba5884130 --- /dev/null +++ b/ContraCLIP/models/genforce/scripts/dist_test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +GPUS=$1 +CONFIG=$2 +WORK_DIR=$3 +CHECKPOINT=$4 +PORT=${PORT:-29500} + +python -m torch.distributed.launch \ + --nproc_per_node=${GPUS} \ + --master_port=${PORT} \ + ./test.py ${CONFIG} \ + --work_dir ${WORK_DIR} \ + --checkpoint ${CHECKPOINT} \ + --launcher="pytorch" \ + ${@:5} diff --git a/ContraCLIP/models/genforce/scripts/dist_train.sh b/ContraCLIP/models/genforce/scripts/dist_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..08b6a214db59e7b60d944026f437dba5bbb0ce74 --- /dev/null +++ b/ContraCLIP/models/genforce/scripts/dist_train.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +GPUS=$1 +CONFIG=$2 +WORK_DIR=$3 +PORT=${PORT:-29500} + +python -m torch.distributed.launch \ + --nproc_per_node=${GPUS} \ + --master_port=${PORT} \ + ./train.py ${CONFIG} \ + --work_dir ${WORK_DIR} \ + --launcher="pytorch" \ + ${@:4} diff --git a/ContraCLIP/models/genforce/scripts/slurm_test.sh b/ContraCLIP/models/genforce/scripts/slurm_test.sh new file mode 100755 index 0000000000000000000000000000000000000000..bfdb7b1dba8fb786a1aa51ea0288272308b93191 --- /dev/null +++ b/ContraCLIP/models/genforce/scripts/slurm_test.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +WORK_DIR=$4 +CHECKPOINT=$5 +GPUS=${GPUS:-8} +GPUS_PER_NODE=${GPUS_PER_NODE:-8} +CPUS_PER_NODE=${CPUS_PER_NODE:-8} + +SRUN_ARGS=${SRUN_ARGS:-""} +PY_ARGS=${@:6} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u ./test.py ${CONFIG} \ + --work_dir=${WORK_DIR} \ + --checkpoint ${CHECKPOINT} \ + --launcher="slurm" \ + ${PY_ARGS} diff --git a/ContraCLIP/models/genforce/scripts/slurm_train.sh b/ContraCLIP/models/genforce/scripts/slurm_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..2027c4c341c09fbe76abeea6fb8702e8e4e748db --- /dev/null +++ b/ContraCLIP/models/genforce/scripts/slurm_train.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +WORK_DIR=$4 +GPUS=${GPUS:-8} +GPUS_PER_NODE=${GPUS_PER_NODE:-8} +CPUS_PER_NODE=${CPUS_PER_NODE:-8} + +SRUN_ARGS=${SRUN_ARGS:-""} +PY_ARGS=${@:5} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_NODE} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u ./train.py ${CONFIG} \ + --work_dir=${WORK_DIR} \ + --launcher="slurm" \ + ${PY_ARGS} diff --git a/ContraCLIP/models/genforce/scripts/stylegan_training_demo.sh b/ContraCLIP/models/genforce/scripts/stylegan_training_demo.sh new file mode 100755 index 0000000000000000000000000000000000000000..dcb7efaff60dd496026a5afcc94fac459aadc9e2 --- /dev/null +++ b/ContraCLIP/models/genforce/scripts/stylegan_training_demo.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +echo "==================================================" +echo "Please ensure you have installed the requirements!" + +# Download data. +echo "Downloading data ..." +mkdir -p data/ +wget -nv https://www.dropbox.com/s/vvtcqcujdjeq3zs/mini_animeface.zip?dl=1 \ + -O data/demo.zip --quiet + +# Launch training. +echo "Launch training job with 1 GPU." +echo "==================================================" +PORT=6666 ./scripts/dist_train.sh 1 \ + configs/stylegan_demo.py \ + work_dirs/stylegan_demo diff --git a/ContraCLIP/models/genforce/synthesize.py b/ContraCLIP/models/genforce/synthesize.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d700a7f6706d7796ecb9d55c0dd4aab5385a76 --- /dev/null +++ b/ContraCLIP/models/genforce/synthesize.py @@ -0,0 +1,139 @@ +# python3.7 +"""A simple tool to synthesize images with pre-trained models.""" + +import os +import argparse +import subprocess +from tqdm import tqdm +import numpy as np + +import torch + +from models import MODEL_ZOO +from models import build_generator +from utils.misc import bool_parser +from utils.visualizer import HtmlPageVisualizer +from utils.visualizer import postprocess_image +from utils.visualizer import save_image + + +def parse_args(): + """Parses arguments.""" + parser = argparse.ArgumentParser( + description='Synthesize images with pre-trained models.') + parser.add_argument('model_name', type=str, + help='Name to the pre-trained model.') + parser.add_argument('--save_dir', type=str, default=None, + help='Directory to save the results. If not specified, ' + 'the results will be saved to ' + '`work_dirs/synthesis/` by default. ' + '(default: %(default)s)') + parser.add_argument('--num', type=int, default=100, + help='Number of samples to synthesize. ' + '(default: %(default)s)') + parser.add_argument('--batch_size', type=int, default=1, + help='Batch size. (default: %(default)s)') + parser.add_argument('--generate_html', type=bool_parser, default=True, + help='Whether to use HTML page to visualize the ' + 'synthesized results. (default: %(default)s)') + parser.add_argument('--save_raw_synthesis', type=bool_parser, default=False, + help='Whether to save raw synthesis. ' + '(default: %(default)s)') + parser.add_argument('--seed', type=int, default=0, + help='Seed for sampling. (default: %(default)s)') + parser.add_argument('--trunc_psi', type=float, default=0.7, + help='Psi factor used for truncation. This is ' + 'particularly applicable to StyleGAN (v1/v2). ' + '(default: %(default)s)') + parser.add_argument('--trunc_layers', type=int, default=8, + help='Number of layers to perform truncation. This is ' + 'particularly applicable to StyleGAN (v1/v2). ' + '(default: %(default)s)') + parser.add_argument('--randomize_noise', type=bool_parser, default=False, + help='Whether to randomize the layer-wise noise. This ' + 'is particularly applicable to StyleGAN (v1/v2). ' + '(default: %(default)s)') + return parser.parse_args() + + +def main(): + """Main function.""" + args = parse_args() + if args.num <= 0: + return + if not args.save_raw_synthesis and not args.generate_html: + return + + # Parse model configuration. + if args.model_name not in MODEL_ZOO: + raise SystemExit(f'Model `{args.model_name}` is not registered in ' + f'`models/model_zoo.py`!') + model_config = MODEL_ZOO[args.model_name].copy() + url = model_config.pop('url') # URL to download model if needed. + + # Get work directory and job name. + if args.save_dir: + work_dir = args.save_dir + else: + work_dir = os.path.join('work_dirs', 'synthesis') + os.makedirs(work_dir, exist_ok=True) + job_name = f'{args.model_name}_{args.num}' + if args.save_raw_synthesis: + os.makedirs(os.path.join(work_dir, job_name), exist_ok=True) + + # Build generation and get synthesis kwargs. + print(f'Building generator for model `{args.model_name}` ...') + generator = build_generator(**model_config) + synthesis_kwargs = dict(trunc_psi=args.trunc_psi, + trunc_layers=args.trunc_layers, + randomize_noise=args.randomize_noise) + print(f'Finish building generator.') + + # Load pre-trained weights. + os.makedirs('/import/nobackup_mmv_ioannisp/jo001/genforce_models', exist_ok=True) + checkpoint_path = os.path.join('/import/nobackup_mmv_ioannisp/jo001/genforce_models', args.model_name + '.pth') + print(f'Loading checkpoint from `{checkpoint_path}` ...') + if not os.path.exists(checkpoint_path): + print(f' Downloading checkpoint from `{url}` ...') + subprocess.call(['wget', '--quiet', '-O', checkpoint_path, url]) + print(f' Finish downloading checkpoint.') + checkpoint = torch.load(checkpoint_path, map_location='cpu') + if 'generator_smooth' in checkpoint: + generator.load_state_dict(checkpoint['generator_smooth']) + else: + generator.load_state_dict(checkpoint['generator']) + generator = generator.cuda() + generator.eval() + print(f'Finish loading checkpoint.') + + # Set random seed. + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + # Sample and synthesize. + print(f'Synthesizing {args.num} samples ...') + indices = list(range(args.num)) + if args.generate_html: + html = HtmlPageVisualizer(grid_size=args.num) + for batch_idx in tqdm(range(0, args.num, args.batch_size)): + sub_indices = indices[batch_idx:batch_idx + args.batch_size] + code = torch.randn(len(sub_indices), generator.z_space_dim).cuda() + with torch.no_grad(): + images = generator(code, **synthesis_kwargs)['image'] + images = postprocess_image(images.detach().cpu().numpy()) + for sub_idx, image in zip(sub_indices, images): + if args.save_raw_synthesis: + save_path = os.path.join( + work_dir, job_name, f'{sub_idx:06d}.jpg') + save_image(save_path, image) + if args.generate_html: + row_idx, col_idx = divmod(sub_idx, html.num_cols) + html.set_cell(row_idx, col_idx, image=image, + text=f'Sample {sub_idx:06d}') + if args.generate_html: + html.save(os.path.join(work_dir, f'{job_name}.html')) + print(f'Finish synthesizing {args.num} samples.') + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/models/genforce/test.py b/ContraCLIP/models/genforce/test.py new file mode 100644 index 0000000000000000000000000000000000000000..8027f8b8455ddd6087efdbcc4989031bde9da00f --- /dev/null +++ b/ContraCLIP/models/genforce/test.py @@ -0,0 +1,114 @@ +# python3.7 +"""Main function for model inference.""" + +import os.path +import shutil +import argparse + +import torch +import torch.distributed as dist + +import runners +from utils.logger import build_logger +from utils.misc import init_dist +from utils.misc import DictAction, parse_config, update_config + + +def parse_args(): + """Parses arguments.""" + parser = argparse.ArgumentParser(description='Run model inference.') + parser.add_argument('config', type=str, + help='Path to the inference configuration.') + parser.add_argument('--work_dir', type=str, required=True, + help='The work directory to save logs and checkpoints.') + parser.add_argument('--checkpoint', type=str, required=True, + help='Path to the checkpoint to load. (default: ' + '%(default)s)') + parser.add_argument('--synthesis_num', type=int, default=1000, + help='Number of samples to synthesize. Set as 0 to ' + 'disable synthesis. (default: %(default)s)') + parser.add_argument('--fid_num', type=int, default=50000, + help='Number of samples to compute FID. Set as 0 to ' + 'disable FID test. (default: %(default)s)') + parser.add_argument('--use_torchvision', action='store_true', + help='Wether to use the Inception model from ' + '`torchvision` to compute FID. (default: False)') + parser.add_argument('--launcher', type=str, default='pytorch', + choices=['pytorch', 'slurm'], + help='Launcher type. (default: %(default)s)') + parser.add_argument('--backend', type=str, default='nccl', + help='Backend for distributed launcher. (default: ' + '%(default)s)') + parser.add_argument('--rank', type=int, default=-1, + help='Node rank for distributed running. (default: ' + '%(default)s)') + parser.add_argument('--local_rank', type=int, default=0, + help='Rank of the current node. (default: %(default)s)') + parser.add_argument('--options', nargs='+', action=DictAction, + help='arguments in dict') + return parser.parse_args() + + +def main(): + """Main function.""" + # Parse arguments. + args = parse_args() + + # Parse configurations. + config = parse_config(args.config) + config = update_config(config, args.options) + config.work_dir = args.work_dir + config.checkpoint = args.checkpoint + config.launcher = args.launcher + config.backend = args.backend + if not os.path.isfile(config.checkpoint): + raise FileNotFoundError(f'Checkpoint file `{config.checkpoint}` is ' + f'missing!') + + # Set CUDNN. + config.cudnn_benchmark = config.get('cudnn_benchmark', True) + config.cudnn_deterministic = config.get('cudnn_deterministic', False) + torch.backends.cudnn.benchmark = config.cudnn_benchmark + torch.backends.cudnn.deterministic = config.cudnn_deterministic + + # Setting for launcher. + config.is_distributed = True + init_dist(config.launcher, backend=config.backend) + config.num_gpus = dist.get_world_size() + + # Setup logger. + if dist.get_rank() == 0: + logger_type = config.get('logger_type', 'normal') + logger = build_logger(logger_type, work_dir=config.work_dir) + shutil.copy(args.config, os.path.join(config.work_dir, 'config.py')) + commit_id = os.popen('git rev-parse HEAD').readline() + logger.info(f'Commit ID: {commit_id}') + else: + logger = build_logger('dumb', work_dir=config.work_dir) + + # Start inference. + runner = getattr(runners, config.runner_type)(config, logger) + runner.load(filepath=config.checkpoint, + running_metadata=False, + learning_rate=False, + optimizer=False, + running_stats=False) + + if args.synthesis_num > 0: + num = args.synthesis_num + logger.print() + logger.info(f'Synthesizing images ...') + runner.synthesize(num, html_name=f'synthesis_{num}.html') + logger.info(f'Finish synthesizing {num} images.') + + if args.fid_num > 0: + num = args.fid_num + logger.print() + logger.info(f'Testing FID ...') + fid_value = runner.fid(num, align_tf=not args.use_torchvision) + logger.info(f'Finish testing FID on {num} samples. ' + f'The result is {fid_value:.6f}.') + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/models/genforce/train.py b/ContraCLIP/models/genforce/train.py new file mode 100644 index 0000000000000000000000000000000000000000..b6b7cc8bf5e8ee47c06e22d0451f039790f25ada --- /dev/null +++ b/ContraCLIP/models/genforce/train.py @@ -0,0 +1,122 @@ +# python3.7 +"""Main function for model training.""" + +import os.path +import shutil +import warnings +import random +import argparse +import numpy as np + +import torch +import torch.distributed as dist + +import runners +from utils.logger import build_logger +from utils.misc import init_dist +from utils.misc import DictAction, parse_config, update_config + + +def parse_args(): + """Parses arguments.""" + parser = argparse.ArgumentParser(description='Run model training.') + parser.add_argument('config', type=str, + help='Path to the training configuration.') + parser.add_argument('--work_dir', type=str, required=True, + help='The work directory to save logs and checkpoints.') + parser.add_argument('--resume_path', type=str, default=None, + help='Path to the checkpoint to resume training.') + parser.add_argument('--weight_path', type=str, default=None, + help='Path to the checkpoint to load model weights, ' + 'but not resume other states.') + parser.add_argument('--seed', type=int, default=None, + help='Random seed. (default: %(default)s)') + parser.add_argument('--launcher', type=str, default='pytorch', + choices=['pytorch', 'slurm'], + help='Launcher type. (default: %(default)s)') + parser.add_argument('--backend', type=str, default='nccl', + help='Backend for distributed launcher. (default: ' + '%(default)s)') + parser.add_argument('--rank', type=int, default=-1, + help='Node rank for distributed running. (default: ' + '%(default)s)') + parser.add_argument('--local_rank', type=int, default=0, + help='Rank of the current node. (default: %(default)s)') + parser.add_argument('--options', nargs='+', action=DictAction, + help='arguments in dict') + return parser.parse_args() + + +def main(): + """Main function.""" + # Parse arguments. + args = parse_args() + + # Parse configurations. + config = parse_config(args.config) + config = update_config(config, args.options) + config.work_dir = args.work_dir + config.resume_path = args.resume_path + config.weight_path = args.weight_path + config.seed = args.seed + config.launcher = args.launcher + config.backend = args.backend + + # Set CUDNN. + config.cudnn_benchmark = config.get('cudnn_benchmark', True) + config.cudnn_deterministic = config.get('cudnn_deterministic', False) + torch.backends.cudnn.benchmark = config.cudnn_benchmark + torch.backends.cudnn.deterministic = config.cudnn_deterministic + + # Set deterministic if random seed is provided. + if config.seed is not None: + config.cudnn_deterministic = True + torch.backends.cudnn.deterministic = True + warnings.warn('Random seed is set for training! ' + 'This will turn on the CUDNN deterministic setting, ' + 'which may slow down the training considerably! ' + 'Unexpected behavior can be observed when resuming from ' + 'checkpoints.') + + # Set launcher. + config.is_distributed = True + init_dist(config.launcher, backend=config.backend) + rank = dist.get_rank() + world_size = dist.get_world_size() + config.num_gpus = world_size + + # Set random seed. + if config.seed is not None: + random.seed(config.seed * world_size + rank) + np.random.seed(config.seed * world_size + rank) + torch.manual_seed(config.seed * world_size + rank) + + # Setup logger. + if dist.get_rank() == 0: + logger_type = config.get('logger_type', 'normal') + logger = build_logger(logger_type, work_dir=config.work_dir) + shutil.copy(args.config, os.path.join(config.work_dir, 'config.py')) + commit_id = os.popen('git rev-parse HEAD').readline() + logger.info(f'Commit ID: {commit_id}') + else: + logger = build_logger('dumb', work_dir=config.work_dir) + + # Start training. + runner = getattr(runners, config.runner_type)(config, logger) + if config.resume_path: + runner.load(filepath=config.resume_path, + running_metadata=True, + learning_rate=True, + optimizer=True, + running_stats=False) + if config.weight_path: + runner.load(filepath=config.weight_path, + running_metadata=False, + learning_rate=False, + optimizer=False, + running_stats=False) + runner.train() + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/models/genforce/utils/__init__.py b/ContraCLIP/models/genforce/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ContraCLIP/models/genforce/utils/logger.py b/ContraCLIP/models/genforce/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..b3a0aac64fa4a81f1f27c81c2c94915051da3e24 --- /dev/null +++ b/ContraCLIP/models/genforce/utils/logger.py @@ -0,0 +1,356 @@ +# python3.7 +"""Utility functions for logging.""" + +import os +import sys +import logging +from tqdm import tqdm +from rich.console import Console +from rich.logging import RichHandler +from rich.progress import Progress +from rich.progress import ProgressColumn +from rich.progress import TextColumn +from rich.progress import BarColumn +from rich.text import Text + +__all__ = ['build_logger', 'Logger', 'RichLogger', 'DumbLogger'] + +DEFAULT_WORK_DIR = 'work_dirs' + +_LOGGER_TYPES = ['normal', 'rich', 'dumb'] + + +def build_logger(logger_type='normal', **kwargs): + """Builds a logger. + + Supported Logger types: + `normal`: The default logger. + `rich`: Record messages with decoration, using `rich` module. + `dumb`: Do NOT record any message. + + Args: + logger_type: Type of logger, which is case insensitive. + (default: `normal`) + **kwargs: Additional arguments. + """ + assert isinstance(logger_type, str) + logger_type = logger_type.lower() + if logger_type not in _LOGGER_TYPES: + raise ValueError(f'Invalid logger type `{logger_type}`!\n' + f'Types allowed: {_LOGGER_TYPES}.') + if logger_type == 'normal': + return Logger(**kwargs) + if logger_type == 'rich': + return RichLogger(**kwargs) + if logger_type == 'dumb': + return DumbLogger(**kwargs) + raise NotImplementedError(f'Not implemented logger type `{logger_type}`!') + + +class Logger(object): + """Defines a logger to record log message both on screen and to file. + + The class sets up a logger with `DEBUG` log level. Two handlers will be + added to the logger. One is the `sys.stderr` stream, with `INFO` log level, + which will print improtant messages on the screen. The other is used to save + all messages to file `$WORK_DIR/$LOGFILE_NAME`. Messages will be added time + stamp and log level before logged. + + NOTE: If `logfile_name` is empty, the file stream will be skipped. + """ + + def __init__(self, + work_dir=DEFAULT_WORK_DIR, + logfile_name='log.txt', + logger_name='logger'): + """Initializes the logger. + + Args: + work_dir: The work directory. (default: DEFAULT_WORK_DIR) + logfile_name: Name of the log file. (default: `log.txt`) + logger_name: Unique name for the logger. (default: `logger`) + """ + self.logger = logging.getLogger(logger_name) + self.logger.propagate = False + if self.logger.hasHandlers(): # Already existed + raise SystemExit(f'Logger `{logger_name}` has already existed!\n' + f'Please use another name, or otherwise the ' + f'messages from these two logger may be mixed up.') + + self.logger.setLevel(logging.DEBUG) + formatter = logging.Formatter( + '[%(asctime)s][%(levelname)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') + + # Print log message with `INFO` level or above onto the screen. + terminal_handler = logging.StreamHandler(stream=sys.stdout) + terminal_handler.setLevel(logging.INFO) + terminal_handler.setFormatter(formatter) + self.logger.addHandler(terminal_handler) + + # Save log message with all levels into log file if needed. + if logfile_name: + os.makedirs(work_dir, exist_ok=True) + self.file_stream = open(os.path.join(work_dir, logfile_name), 'a') + file_handler = logging.StreamHandler(stream=self.file_stream) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + self.logger.addHandler(file_handler) + + self.log = self.logger.log + self.debug = self.logger.debug + self.info = self.logger.info + self.warning = self.logger.warning + self.error = self.logger.error + self.exception = self.logger.exception + self.critical = self.logger.critical + + self.pbar = [] + self.pbar_kwargs = None + + def __del__(self): + if hasattr(self, 'file_stream'): + self.file_stream.close() + + def print(self, *messages, **_unused_kwargs): + """Prints messages without time stamp or log level.""" + for handler in self.logger.handlers: + print(*messages, file=handler.stream) + + def init_pbar(self, leave=False): + """Initializes a progress bar which will display on the screen only. + + Args: + leave: Whether to leave the trace. (default: False) + """ + columns = [ + '{desc}', + '{bar}', + ' {percentage:5.1f}%', + '[{elapsed}<{remaining}, {rate_fmt}{postfix}]', + ] + self.pbar_kwargs = dict( + leave=leave, + bar_format=' '.join(columns), + unit='', + ) + + def add_pbar_task(self, name, total): + """Adds a task to the progress bar. + + Args: + name: Name of the new task. + total: Total number of steps (samples) contained in the task. + + Returns: + The task ID. + """ + assert isinstance(self.pbar_kwargs, dict) + self.pbar.append(tqdm(desc=name, total=total, **self.pbar_kwargs)) + return len(self.pbar) - 1 + + def update_pbar(self, task_id, advance=1): + """Updates a certain task in the progress bar. + + Args: + task_id: ID of the task to update. + advance: Number of steps advanced onto the target task. (default: 1) + """ + assert len(self.pbar) > task_id and isinstance(self.pbar[task_id], tqdm) + if self.pbar[task_id].n < self.pbar[task_id].total: + self.pbar[task_id].update(advance) + if self.pbar[task_id].n >= self.pbar[task_id].total: + self.pbar[task_id].refresh() + + def close_pbar(self): + """Closes the progress bar""" + for pbar in self.pbar[::-1]: + pbar.close() + self.pbar.clear() + self.pbar_kwargs = None + + +def _format_time(seconds): + """Formats seconds to readable time string. + + This function is used to display time in progress bar. + """ + if not seconds: + return '--:--' + + seconds = int(seconds) + hours, seconds = divmod(seconds, 3600) + minutes, seconds = divmod(seconds, 60) + if hours: + return f'{hours}:{minutes:02d}:{seconds:02d}' + return f'{minutes:02d}:{seconds:02d}' + + +class TimeColumn(ProgressColumn): + """Renders total time, ETA, and speed in progress bar.""" + + max_refresh = 0.5 # Only refresh twice a second to prevent jitter + + def render(self, task): + elapsed_time = _format_time(task.elapsed) + eta = _format_time(task.time_remaining) + speed = f'{task.speed:.2f}/s' if task.speed else '?/s' + return Text(f'[{elapsed_time}<{eta}, {speed}]', + style="progress.remaining") + + +class RichLogger(object): + """Defines a logger based on `rich.RichHandler`. + + Compared to the basic Logger, this logger will decorate the log message in + a pretty format automatically. + """ + + def __init__(self, + work_dir=DEFAULT_WORK_DIR, + logfile_name='log.txt', + logger_name='logger'): + """Initializes the logger. + + Args: + work_dir: The work directory. (default: DEFAULT_WORK_DIR) + logfile_name: Name of the log file. (default: `log.txt`) + logger_name: Unique name for the logger. (default: `logger`) + """ + self.logger = logging.getLogger(logger_name) + self.logger.propagate = False + if self.logger.hasHandlers(): # Already existed + raise SystemExit(f'Logger `{logger_name}` has already existed!\n' + f'Please use another name, or otherwise the ' + f'messages from these two logger may be mixed up.') + + self.logger.setLevel(logging.DEBUG) + + # Print log message with `INFO` level or above onto the screen. + terminal_console = Console( + file=sys.stderr, log_time=False, log_path=False) + terminal_handler = RichHandler( + level=logging.INFO, + console=terminal_console, + show_time=True, + show_level=True, + show_path=False) + terminal_handler.setFormatter(logging.Formatter('%(message)s')) + self.logger.addHandler(terminal_handler) + + # Save log message with all levels into log file if needed. + if logfile_name: + os.makedirs(work_dir, exist_ok=True) + self.file_stream = open(os.path.join(work_dir, logfile_name), 'a') + file_console = Console( + file=self.file_stream, log_time=False, log_path=False) + file_handler = RichHandler( + level=logging.DEBUG, + console=file_console, + show_time=True, + show_level=True, + show_path=False) + file_handler.setFormatter(logging.Formatter('%(message)s')) + self.logger.addHandler(file_handler) + + self.log = self.logger.log + self.debug = self.logger.debug + self.info = self.logger.info + self.warning = self.logger.warning + self.error = self.logger.error + self.exception = self.logger.exception + self.critical = self.logger.critical + + self.pbar = None + + def __del__(self): + if hasattr(self, 'file_stream'): + self.file_stream.close() + + def print(self, *messages, **kwargs): + """Prints messages without time stamp or log level.""" + for handler in self.logger.handlers: + handler.console.print(*messages, **kwargs) + + def init_pbar(self, leave=False): + """Initializes a progress bar which will display on the screen only. + + Args: + leave: Whether to leave the trace. (default: False) + """ + assert self.pbar is None + + # Columns shown in the progress bar. + columns = ( + TextColumn("[progress.description]{task.description}"), + BarColumn(bar_width=None), + TextColumn("[progress.percentage]{task.percentage:>5.1f}%"), + TimeColumn(), + ) + + self.pbar = Progress(*columns, + console=self.logger.handlers[0].console, + transient=not leave, + auto_refresh=True, + refresh_per_second=10) + self.pbar.start() + + def add_pbar_task(self, name, total): + """Adds a task to the progress bar. + + Args: + name: Name of the new task. + total: Total number of steps (samples) contained in the task. + + Returns: + The task ID. + """ + assert isinstance(self.pbar, Progress) + task_id = self.pbar.add_task(name, total=total) + return task_id + + def update_pbar(self, task_id, advance=1): + """Updates a certain task in the progress bar. + + Args: + task_id: ID of the task to update. + advance: Number of steps advanced onto the target task. (default: 1) + """ + assert isinstance(self.pbar, Progress) + if self.pbar.tasks[int(task_id)].finished: + if self.pbar.tasks[int(task_id)].stop_time is None: + self.pbar.stop_task(task_id) + else: + self.pbar.update(task_id, advance=advance) + + def close_pbar(self): + """Closes the progress bar""" + assert isinstance(self.pbar, Progress) + self.pbar.stop() + self.pbar = None + + +class DumbLogger(object): + """Implements a dumb logger. + + This logger also has member functions like `info()`, `warning()`, etc. But + nothing will be logged. + """ + + def __init__(self, *_unused_args, **_unused_kwargs): + """Initializes with dumb functions.""" + self.logger = None + self.log = lambda *args, **kwargs: None + self.debug = lambda *args, **kwargs: None + self.info = lambda *args, **kwargs: None + self.warning = lambda *args, **kwargs: None + self.error = lambda *args, **kwargs: None + self.exception = lambda *args, **kwargs: None + self.critical = lambda *args, **kwargs: None + self.print = lambda *args, **kwargs: None + + self.pbar = None + self.init_pbar = lambda *args, **kwargs: None + self.add_pbar_task = lambda *args, **kwargs: -1 + self.update_pbar = lambda *args, **kwargs: None + self.close_pbar = lambda *args, **kwargs: None diff --git a/ContraCLIP/models/genforce/utils/logger_test.py b/ContraCLIP/models/genforce/utils/logger_test.py new file mode 100644 index 0000000000000000000000000000000000000000..338a7f4c283943ab04ac368adab1e2bc069d880e --- /dev/null +++ b/ContraCLIP/models/genforce/utils/logger_test.py @@ -0,0 +1,36 @@ +# python3.7 +"""Unit test for logger.""" + +import time + +from .logger import build_logger + + +def test_logger(): + """Test function.""" + + for logger_type in ['normal', 'rich', 'dumb']: + if logger_type == 'normal': + class_name = 'Logger' + elif logger_type == 'rich': + class_name = 'RichLogger' + elif logger_type == 'dumb': + class_name = 'DumbLogger' + + print(f'===== Test `utils.logger.{class_name}` =====') + logger = build_logger(logger_type, + logger_name=logger_type, + logfile_name=f'test_{logger_type}_logger.log') + logger.print('print log') + logger.debug('debug log') + logger.info('info log') + logger.warning('warning log') + logger.init_pbar() + task1 = logger.add_pbar_task('Task 1', 500) + task2 = logger.add_pbar_task('Task 2', 1000) + for _ in range(1000): + logger.update_pbar(task1, 1) + logger.update_pbar(task2, 1) + time.sleep(0.005) + logger.close_pbar() + print('Success!') diff --git a/ContraCLIP/models/genforce/utils/misc.py b/ContraCLIP/models/genforce/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..f4d6c627a6b61b2b1df4acff105c61d766243d94 --- /dev/null +++ b/ContraCLIP/models/genforce/utils/misc.py @@ -0,0 +1,130 @@ +# python3.7 +"""Misc utility functions.""" + +import os +import sys +import subprocess +from importlib import import_module +import argparse +from easydict import EasyDict + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +__all__ = [ + 'init_dist', 'bool_parser', 'DictAction', 'parse_config', 'update_config' +] + + +def init_dist(launcher, backend='nccl', **kwargs): + """Initializes distributed environment.""" + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + elif launcher == 'slurm': + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + port = os.environ.get('PORT', 29500) + os.environ['MASTER_PORT'] = str(port) + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + else: + raise NotImplementedError(f'Not implemented launcher type: ' + f'`{launcher}`!') + +def bool_parser(arg): + """Parses an argument to boolean.""" + if isinstance(arg, bool): + return arg + if arg.lower() in ['1', 'true', 't', 'yes', 'y']: + return True + if arg.lower() in ['0', 'false', 'f', 'no', 'n']: + return False + raise argparse.ArgumentTypeError(f'`{arg}` cannot be converted to boolean!') + + +class DictAction(argparse.Action): + """Argparse action to split an argument into key-value. + + NOTE: This class is borrowed from + https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return val.lower() == 'true' + return val + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + val = [self._parse_int_float_bool(v) for v in val.split(',')] + if len(val) == 1: + val = val[0] + options[key] = val + setattr(namespace, self.dest, options) + + +def parse_config(config_file): + """Parses configuration from python file.""" + assert os.path.isfile(config_file) + directory = os.path.dirname(config_file) + filename = os.path.basename(config_file) + module_name, extension = os.path.splitext(filename) + assert extension == '.py' + sys.path.insert(0, directory) + module = import_module(module_name) + sys.path.pop(0) + config = EasyDict() + for key, value in module.__dict__.items(): + if key.startswith('__'): + continue + config[key] = value + del sys.modules[module_name] + return config + + +def update_config(config, new_config): + """Updates configuration in a hierarchical level. + + For key-value pair {'a.b.c.d': v} in `new_config`, the `config` will be + updated by + + config['a']['b']['c']['d'] = v + """ + if new_config is None: + return config + + assert isinstance(config, dict) + assert isinstance(new_config, dict) + + for key, val in new_config.items(): + hierarchical_keys = key.split('.') + temp = config + for sub_key in hierarchical_keys[:-1]: + temp = temp[sub_key] + temp[hierarchical_keys[-1]] = val + + return config diff --git a/ContraCLIP/models/genforce/utils/visualizer.py b/ContraCLIP/models/genforce/utils/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..eb7cd5d925c96beb386d0652be6d4ecb6ace8a2f --- /dev/null +++ b/ContraCLIP/models/genforce/utils/visualizer.py @@ -0,0 +1,746 @@ +# python3.7 +"""Utility functions for visualizing results.""" + +import base64 +import os.path +import cv2 +import numpy as np +from bs4 import BeautifulSoup + +__all__ = [ + 'get_grid_shape', 'get_blank_image', 'load_image', 'save_image', + 'resize_image', 'postprocess_image', 'add_text_to_image', + 'parse_image_size', 'fuse_images', 'HtmlPageVisualizer', 'HtmlPageReader', + 'VideoReader', 'VideoWriter' +] + + +def get_grid_shape(size, row=0, col=0, is_portrait=False): + """Gets the shape of a grid based on the size. + + This function makes greatest effort on making the output grid square if + neither `row` nor `col` is set. If `is_portrait` is set as `False`, the + height will always be equal to or smaller than the width. For example, if + input `size = 16`, output shape will be `(4, 4)`; if input `size = 15`, + output shape will be (3, 5). Otherwise, the height will always be equal to + or larger than the width. + + Args: + size: Size (height * width) of the target grid. + is_portrait: Whether to return a portrait size of a landscape size. + (default: False) + + Returns: + A two-element tuple, representing height and width respectively. + """ + assert isinstance(size, int) + assert isinstance(row, int) + assert isinstance(col, int) + if size == 0: + return (0, 0) + + if row > 0 and col > 0 and row * col != size: + row = 0 + col = 0 + + if row > 0 and size % row == 0: + return (row, size // row) + if col > 0 and size % col == 0: + return (size // col, col) + + row = int(np.sqrt(size)) + while row > 0: + if size % row == 0: + col = size // row + break + row = row - 1 + + return (col, row) if is_portrait else (row, col) + + +def get_blank_image(height, width, channels=3, is_black=True): + """Gets a blank image, either white of black. + + NOTE: This function will always return an image with `RGB` channel order for + color image and pixel range [0, 255]. + + Args: + height: Height of the returned image. + width: Width of the returned image. + channels: Number of channels. (default: 3) + is_black: Whether to return a black image. (default: True) + """ + shape = (height, width, channels) + if is_black: + return np.zeros(shape, dtype=np.uint8) + return np.ones(shape, dtype=np.uint8) * 255 + + +def load_image(path, image_channels=3): + """Loads an image from disk. + + NOTE: This function will always return an image with `RGB` channel order for + color image and pixel range [0, 255]. + + Args: + path: Path to load the image from. + image_channels: Number of image channels of returned image. This field + is employed since `cv2.imread()` will always return a 3-channel + image, even for grayscale image. + + Returns: + An image with dtype `np.ndarray`, or `None` if `path` does not exist. + """ + if not os.path.isfile(path): + return None + + assert image_channels in [1, 3] + + image = cv2.imread(path) + assert image.ndim == 3 and image.shape[2] == 3 + if image_channels == 1: + return image[:, :, 0:1] + return image[:, :, ::-1] + + +def save_image(path, image): + """Saves an image to disk. + + NOTE: The input image (if colorful) is assumed to be with `RGB` channel + order and pixel range [0, 255]. + + Args: + path: Path to save the image to. + image: Image to save. + """ + if image is None: + return + + assert image.ndim == 3 and image.shape[2] in [1, 3] + cv2.imwrite(path, image[:, :, ::-1]) + + +def resize_image(image, *args, **kwargs): + """Resizes image. + + This is a wrap of `cv2.resize()`. + + NOTE: THe channel order of the input image will not be changed. + + Args: + image: Image to resize. + """ + if image is None: + return None + + assert image.ndim == 3 and image.shape[2] in [1, 3] + image = cv2.resize(image, *args, **kwargs) + if image.ndim == 2: + return image[:, :, np.newaxis] + return image + + +def postprocess_image(image, min_val=-1.0, max_val=1.0, data_format='NCHW'): + """Post-processes image to pixel range [0, 255] with dtype `uint8`. + + NOTE: The returned image will always be with `HWC` format. + + Args: + min_val: Minimum value of the input image. + max_val: Maximum value of the input image. + data_format: Data format of the input image. Supporting `NCHW`, `NHWC`, + `CHW`, `HWC`. + + Returns: + The post-processed image. + + Raises: + NotImplementedError: If the input `data_format` is not support. + """ + assert isinstance(image, np.ndarray) + image = image.astype(np.float64) + image = (image - min_val) * 255 / (max_val - min_val) + image = np.clip(image + 0.5, 0, 255).astype(np.uint8) + data_format = data_format.upper() + if data_format == 'NCHW': + assert image.ndim == 4 and image.shape[1] in [1, 3] + return image.transpose(0, 2, 3, 1) + if data_format == 'NHWC': + assert image.ndim == 4 and image.shape[3] in [1, 3] + return image + if data_format == 'CHW': + assert image.ndim == 3 and image.shape[0] in [1, 3] + return image.transpose(1, 2, 0) + if data_format == 'HWC': + assert image.ndim == 3 and image.shape[2] in [1, 3] + return image + raise NotImplementedError(f'Data format `{data_format}` is not supported!') + + +def add_text_to_image(image, + text='', + position=None, + font=cv2.FONT_HERSHEY_TRIPLEX, + font_size=1.0, + line_type=cv2.LINE_8, + line_width=1, + color=(255, 255, 255)): + """Overlays text on given image. + + NOTE: The input image is assumed to be with `RGB` channel order. + + Args: + image: The image to overlay text on. + text: Text content to overlay on the image. (default: '') + position: Target position (bottom-left corner) to add text. If not set, + center of the image will be used by default. (default: None) + font: Font of the text added. (default: cv2.FONT_HERSHEY_TRIPLEX) + font_size: Font size of the text added. (default: 1.0) + line_type: Line type used to depict the text. (default: cv2.LINE_8) + line_width: Line width used to depict the text. (default: 1) + color: Color of the text added in `RGB` channel order. (default: + (255, 255, 255)) + + Returns: + An image with target text overlayed on. + """ + if image is None or not text: + return image + + cv2.putText(img=image, + text=text, + org=position, + fontFace=font, + fontScale=font_size, + color=color, + thickness=line_width, + lineType=line_type, + bottomLeftOrigin=False) + + return image + + +def parse_image_size(obj): + """Parses object to a pair of image size, i.e., (width, height). + + Args: + obj: The input object to parse image size from. + + Returns: + A two-element tuple, indicating image width and height respectively. + + Raises: + If the input is invalid, i.e., neither a list or tuple, nor a string. + """ + if obj is None or obj == '': + width = height = 0 + elif isinstance(obj, int): + width = height = obj + elif isinstance(obj, (list, tuple, np.ndarray)): + numbers = tuple(obj) + if len(numbers) == 0: + width = height = 0 + elif len(numbers) == 1: + width = height = numbers[0] + elif len(numbers) == 2: + width = numbers[0] + height = numbers[1] + else: + raise ValueError(f'At most two elements for image size.') + elif isinstance(obj, str): + splits = obj.replace(' ', '').split(',') + numbers = tuple(map(int, splits)) + if len(numbers) == 0: + width = height = 0 + elif len(numbers) == 1: + width = height = numbers[0] + elif len(numbers) == 2: + width = numbers[0] + height = numbers[1] + else: + raise ValueError(f'At most two elements for image size.') + else: + raise ValueError(f'Invalid type of input: {type(obj)}!') + + return (max(0, width), max(0, height)) + + +def fuse_images(images, + image_size=None, + row=0, + col=0, + is_row_major=True, + is_portrait=False, + row_spacing=0, + col_spacing=0, + border_left=0, + border_right=0, + border_top=0, + border_bottom=0, + black_background=True): + """Fuses a collection of images into an entire image. + + Args: + images: A collection of images to fuse. Should be with shape [num, + height, width, channels]. + image_size: This field is used to resize the image before fusion. `0` + disables resizing. (default: None) + row: Number of rows used for image fusion. If not set, this field will + be automatically assigned based on `col` and total number of images. + (default: None) + col: Number of columns used for image fusion. If not set, this field + will be automatically assigned based on `row` and total number of + images. (default: None) + is_row_major: Whether the input images should be arranged row-major or + column-major. (default: True) + is_portrait: Only active when both `row` and `col` should be assigned + automatically. (default: False) + row_spacing: Space between rows. (default: 0) + col_spacing: Space between columns. (default: 0) + border_left: Width of left border. (default: 0) + border_right: Width of right border. (default: 0) + border_top: Width of top border. (default: 0) + border_bottom: Width of bottom border. (default: 0) + + Returns: + The fused image. + + Raises: + ValueError: If the input `images` is not with shape [num, height, width, + width]. + """ + if images is None: + return images + + if images.ndim != 4: + raise ValueError(f'Input `images` should be with shape [num, height, ' + f'width, channels], but {images.shape} is received!') + + num, image_height, image_width, channels = images.shape + width, height = parse_image_size(image_size) + height = height or image_height + width = width or image_width + row, col = get_grid_shape(num, row=row, col=col, is_portrait=is_portrait) + fused_height = ( + height * row + row_spacing * (row - 1) + border_top + border_bottom) + fused_width = ( + width * col + col_spacing * (col - 1) + border_left + border_right) + fused_image = get_blank_image( + fused_height, fused_width, channels=channels, is_black=black_background) + images = images.reshape(row, col, image_height, image_width, channels) + if not is_row_major: + images = images.transpose(1, 0, 2, 3, 4) + + for i in range(row): + y = border_top + i * (height + row_spacing) + for j in range(col): + x = border_left + j * (width + col_spacing) + if height != image_height or width != image_width: + image = cv2.resize(images[i, j], (width, height)) + else: + image = images[i, j] + fused_image[y:y + height, x:x + width] = image + + return fused_image + + +def get_sortable_html_header(column_name_list, sort_by_ascending=False): + """Gets header for sortable html page. + + Basically, the html page contains a sortable table, where user can sort the + rows by a particular column by clicking the column head. + + Example: + + column_name_list = [name_1, name_2, name_3] + header = get_sortable_html_header(column_name_list) + footer = get_sortable_html_footer() + sortable_table = ... + html_page = header + sortable_table + footer + + Args: + column_name_list: List of column header names. + sort_by_ascending: Default sorting order. If set as `True`, the html + page will be sorted by ascending order when the header is clicked + for the first time. + + Returns: + A string, which represents for the header for a sortable html page. + """ + header = '\n'.join([ + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '']) + for idx, name in enumerate(column_name_list): + header += f' \n' + header += '\n' + header += '\n' + header += '\n' + + return header + + +def get_sortable_html_footer(): + """Gets footer for sortable html page. + + Check function `get_sortable_html_header()` for more details. + """ + return '\n
{name}
\n\n\n\n' + + +def encode_image_to_html_str(image, image_size=None): + """Encodes an image to html language. + + NOTE: Input image is always assumed to be with `RGB` channel order. + + Args: + image: The input image to encode. Should be with `RGB` channel order. + image_size: This field is used to resize the image before encoding. `0` + disables resizing. (default: None) + + Returns: + A string which represents the encoded image. + """ + if image is None: + return '' + + assert image.ndim == 3 and image.shape[2] in [1, 3] + + # Change channel order to `BGR`, which is opencv-friendly. + image = image[:, :, ::-1] + + # Resize the image if needed. + width, height = parse_image_size(image_size) + if height or width: + height = height or image.shape[0] + width = width or image.shape[1] + image = cv2.resize(image, (width, height)) + + # Encode the image to html-format string. + encoded_image = cv2.imencode('.jpg', image)[1].tostring() + encoded_image_base64 = base64.b64encode(encoded_image).decode('utf-8') + html_str = f'' + + return html_str + + +def decode_html_str_to_image(html_str, image_size=None): + """Decodes image from html. + + Args: + html_str: Image string parsed from html. + image_size: This field is used to resize the image after decoding. `0` + disables resizing. (default: None) + + Returns: + An image with `RGB` channel order. + """ + if not html_str: + return None + + assert isinstance(html_str, str) + image_str = html_str.split(',')[-1] + encoded_image = base64.b64decode(image_str) + encoded_image_numpy = np.frombuffer(encoded_image, dtype=np.uint8) + image = cv2.imdecode(encoded_image_numpy, flags=cv2.IMREAD_COLOR) + + # Resize the image if needed. + width, height = parse_image_size(image_size) + if height or width: + height = height or image.shape[0] + width = width or image.shape[1] + image = cv2.resize(image, (width, height)) + + return image[:, :, ::-1] + + +class HtmlPageVisualizer(object): + """Defines the html page visualizer. + + This class can be used to visualize image results as html page. Basically, + it is based on an html-format sorted table with helper functions + `get_sortable_html_header()`, `get_sortable_html_footer()`, and + `encode_image_to_html_str()`. To simplify the usage, specifying the + following fields are enough to create a visualization page: + + (1) num_rows: Number of rows of the table (header-row exclusive). + (2) num_cols: Number of columns of the table. + (3) header contents (optional): Title of each column. + + NOTE: `grid_size` can be used to assign `num_rows` and `num_cols` + automatically. + + Example: + + html = HtmlPageVisualizer(num_rows, num_cols) + html.set_headers([...]) + for i in range(num_rows): + for j in range(num_cols): + html.set_cell(i, j, text=..., image=..., highlight=False) + html.save('visualize.html') + """ + + def __init__(self, + num_rows=0, + num_cols=0, + grid_size=0, + is_portrait=True, + viz_size=None): + if grid_size > 0: + num_rows, num_cols = get_grid_shape( + grid_size, row=num_rows, col=num_cols, is_portrait=is_portrait) + assert num_rows > 0 and num_cols > 0 + + self.num_rows = num_rows + self.num_cols = num_cols + self.viz_size = parse_image_size(viz_size) + self.headers = ['' for _ in range(self.num_cols)] + self.cells = [[{ + 'text': '', + 'image': '', + 'highlight': False, + } for _ in range(self.num_cols)] for _ in range(self.num_rows)] + + def set_header(self, col_idx, content): + """Sets the content of a particular header by column index.""" + self.headers[col_idx] = content + + def set_headers(self, contents): + """Sets the contents of all headers.""" + if isinstance(contents, str): + contents = [contents] + assert isinstance(contents, (list, tuple)) + assert len(contents) == self.num_cols + for col_idx, content in enumerate(contents): + self.set_header(col_idx, content) + + def set_cell(self, row_idx, col_idx, text='', image=None, highlight=False): + """Sets the content of a particular cell. + + Basically, a cell contains some text as well as an image. Both text and + image can be empty. + + Args: + row_idx: Row index of the cell to edit. + col_idx: Column index of the cell to edit. + text: Text to add into the target cell. (default: None) + image: Image to show in the target cell. Should be with `RGB` + channel order. (default: None) + highlight: Whether to highlight this cell. (default: False) + """ + self.cells[row_idx][col_idx]['text'] = text + self.cells[row_idx][col_idx]['image'] = encode_image_to_html_str( + image, self.viz_size) + self.cells[row_idx][col_idx]['highlight'] = bool(highlight) + + def save(self, save_path): + """Saves the html page.""" + html = '' + for i in range(self.num_rows): + html += f'\n' + for j in range(self.num_cols): + text = self.cells[i][j]['text'] + image = self.cells[i][j]['image'] + if self.cells[i][j]['highlight']: + color = ' bgcolor="#FF8888"' + else: + color = '' + if text: + html += f' {text}

{image}\n' + else: + html += f' {image}\n' + html += f'\n' + + header = get_sortable_html_header(self.headers) + footer = get_sortable_html_footer() + + with open(save_path, 'w') as f: + f.write(header + html + footer) + + +class HtmlPageReader(object): + """Defines the html page reader. + + This class can be used to parse results from the visualization page + generated by `HtmlPageVisualizer`. + + Example: + + html = HtmlPageReader(html_path) + for j in range(html.num_cols): + header = html.get_header(j) + for i in range(html.num_rows): + for j in range(html.num_cols): + text = html.get_text(i, j) + image = html.get_image(i, j, image_size=None) + """ + def __init__(self, html_path): + """Initializes by loading the content from file.""" + self.html_path = html_path + if not os.path.isfile(html_path): + raise ValueError(f'File `{html_path}` does not exist!') + + # Load content. + with open(html_path, 'r') as f: + self.html = BeautifulSoup(f, 'html.parser') + + # Parse headers. + thead = self.html.find('thead') + headers = thead.findAll('th') + self.headers = [] + for header in headers: + self.headers.append(header.text) + self.num_cols = len(self.headers) + + # Parse cells. + tbody = self.html.find('tbody') + rows = tbody.findAll('tr') + self.cells = [] + for row in rows: + cells = row.findAll('td') + self.cells.append([]) + for cell in cells: + self.cells[-1].append({ + 'text': cell.text, + 'image': cell.find('img')['src'], + }) + assert len(self.cells[-1]) == self.num_cols + self.num_rows = len(self.cells) + + def get_header(self, j): + """Gets header for a particular column.""" + return self.headers[j] + + def get_text(self, i, j): + """Gets text from a particular cell.""" + return self.cells[i][j]['text'] + + def get_image(self, i, j, image_size=None): + """Gets image from a particular cell.""" + return decode_html_str_to_image(self.cells[i][j]['image'], image_size) + + +class VideoReader(object): + """Defines the video reader. + + This class can be used to read frames from a given video. + """ + + def __init__(self, path): + """Initializes the video reader by loading the video from disk.""" + if not os.path.isfile(path): + raise ValueError(f'Video `{path}` does not exist!') + + self.path = path + self.video = cv2.VideoCapture(path) + assert self.video.isOpened() + self.position = 0 + + self.length = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT)) + self.frame_height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + self.frame_width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH)) + self.fps = self.video.get(cv2.CAP_PROP_FPS) + + def __del__(self): + """Releases the opened video.""" + self.video.release() + + def read(self, position=None): + """Reads a certain frame. + + NOTE: The returned frame is assumed to be with `RGB` channel order. + + Args: + position: Optional. If set, the reader will read frames from the + exact position. Otherwise, the reader will read next frames. + (default: None) + """ + if position is not None and position < self.length: + self.video.set(cv2.CAP_PROP_POS_FRAMES, position) + self.position = position + + success, frame = self.video.read() + self.position = self.position + 1 + + return frame[:, :, ::-1] if success else None + + +class VideoWriter(object): + """Defines the video writer. + + This class can be used to create a video. + + NOTE: `.avi` and `DIVX` is the most recommended codec format since it does + not rely on other dependencies. + """ + + def __init__(self, path, frame_height, frame_width, fps=24, codec='DIVX'): + """Creates the video writer.""" + self.path = path + self.frame_height = frame_height + self.frame_width = frame_width + self.fps = fps + self.codec = codec + + self.video = cv2.VideoWriter(filename=path, + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=fps, + frameSize=(frame_width, frame_height)) + + def __del__(self): + """Releases the opened video.""" + self.video.release() + + def write(self, frame): + """Writes a target frame. + + NOTE: The input frame is assumed to be with `RGB` channel order. + """ + self.video.write(frame[:, :, ::-1]) diff --git a/ContraCLIP/models/jung_radii.json b/ContraCLIP/models/jung_radii.json new file mode 100644 index 0000000000000000000000000000000000000000..8fbcd415c9a679478dd4c2bd1565272732df0e40 --- /dev/null +++ b/ContraCLIP/models/jung_radii.json @@ -0,0 +1 @@ +{"pggan_celebahq1024": {"Z": [0.0, 26.58317756652832]}, "pggan_church256": {"Z": [0.0, 26.521297454833984]}, "pggan_car256": {"Z": [0.0, 26.50217628479004]}, "stylegan2_ffhq1024": {"Z": [0.0, 26.577896118164062], "W": {"0": [19.693096285055184, 5.8070265041010316e-08], "1": [27.863825914962064, 3.4815407268240506e-07], "2": [34.131626991391585, 2.481756347094688e-07], "3": [39.41501415696475, 6.35090437128838e-07], "4": [44.06947641939221, 1.2271238922778593e-06], "5": [48.277262162375386, 2.172426018631768e-06], "6": [52.14661665812577, 2.4128904421161224e-06], "7": [55.74804662226059, 2.4421301461075018e-06], "8": [59.130535104015074, 2.9349563135383505e-06], "9": [62.32973554976312, 2.4451202094155633e-06], "10": [65.37255577458802, 3.033502643745578e-06], "11": [68.27991356833932, 3.2974155104170677e-06], "12": [71.0684326470882, 2.7158315276665235e-06], "13": [73.75159446760375, 3.298517093242026e-06], "14": [76.34051008853974, 1.007432587130097e-06], "15": [78.84446075332441, -3.7039074385347703e-07], "16": [81.27130542289308, -7.584701080531886e-07], "17": [83.62775052955048, -9.79042688697973e-07]}}, "stylegan2_afhqcat512": {"Z": [0.0, 26.661848068237305], "W": {"0": [17.814635191105385, -1.8532913870217271e-07], "1": [25.205985746918735, -4.251561964707662e-07], "2": [30.875919756244116, 6.25726809033722e-07], "3": [35.655340829226056, 1.3485521321854321e-06], "4": [39.865831142841, 1.5636481620617815e-07], "5": [43.672251291243555, 1.2542547978000584e-07], "6": [47.17252091259847, -2.7527510937375155e-07], "7": [50.43042110531245, -1.0729623767247176e-07], "8": [53.4902658808743, -8.105917039813448e-07], "9": [56.38430537516529, -2.0133858953386152e-06], "10": [59.13688241310246, -2.896557553810908e-06], "11": [61.76691825759687, -3.55566295695553e-06], "12": [64.28944918503463, -4.832988530267812e-06], "13": [66.71667526547272, -5.965941028307498e-06], "14": [69.0586410343057, -6.651547884928277e-06], "15": [71.32374751559972, -7.4827238307761945e-06]}}, "stylegan2_afhqdog512": {"Z": [0.0, 26.78862190246582], "W": {"0": [16.610793462523535, -7.008719382639583e-07], "1": [23.502664856076635, -2.000512875355298e-07], "2": [28.78944457718248, 2.789182630635878e-06], "3": [33.2458942576997, 1.7696009173562288e-06], "4": [37.171855802504545, 7.357770392957264e-07], "5": [40.72105149719189, 1.29023006678608e-06], "6": [43.98478194730904, 4.388475574046424e-06], "7": [47.02252296315562, 7.203829163415776e-06], "8": [49.8755902932422, 1.0025949759295827e-05], "9": [52.57405775340872, 1.268333333115379e-05], "10": [55.140623983691626, 1.4153596982424688e-05], "11": [57.5929456613245, 3.6627073747297345e-06], "12": [59.945029836283275, -7.81830388518756e-06], "13": [62.20824571867588, -1.8019188367190964e-05], "14": [64.3919619440639, -2.7173139869773877e-05], "15": [66.50401228536475, -3.632772087769354e-05]}}, "stylegan2_car512": {"Z": [0.0, 26.54909324645996], "W": {"0": [40.513785471774575, -2.7958710191455793e-07], "1": [57.32308724689799, -5.609052848853935e-07], "2": [70.21757514154163, -2.5791067557179304e-06], "3": [81.0868652192673, -1.6129371047668428e-06], "4": [90.6622946641626, 1.040103015270688e-06], "5": [99.31879847246428, 2.722850332759208e-06], "6": [107.27905536689379, 5.5139371681889315e-06], "7": [114.6881240970624, 7.260073928705424e-06], "8": [121.6467767306287, 7.432553616126825e-06], "9": [128.2283551074491, 8.471649465491282e-06], "10": [134.48822302991408, 9.113790170545144e-06], "11": [140.46941021015937, 9.093961296002817e-06], "12": [146.20610853516231, 9.904049392162051e-06], "13": [151.72606452904128, 1.1957376287341503e-05], "14": [157.05212698706703, 1.0424447125956249e-05], "15": [162.20340341750548, 5.866954012390124e-06]}}, "stylegan2_church256": {"Z": [0.0, 26.62998390197754], "W": {"0": [65.0133429593379, 1.708210092488116e-06], "1": [91.98759347078432, 1.4913829531337797e-06], "2": [112.67965392311024, 5.572857020297306e-07], "3": [130.12184993001102, -4.2359506551292725e-06], "4": [145.4877587963646, -8.556567436812657e-06], "5": [159.37904536307056, -1.1719555757849776e-05], "6": [172.15304982024847, -1.4059614443340251e-05], "7": [184.0425492113573, -1.5149132281067068e-05], "8": [195.2092491691262, -1.4811726686048132e-05], "9": [205.77085044533507, -1.4459717021964025e-05], "10": [215.8161987254329, -1.5103431465490758e-05], "11": [225.41434084196692, -1.6178596951021973e-05], "12": [234.62013190996535, -6.952946762339707e-06], "13": [243.47812896955136, -4.452998126680541e-06]}}} \ No newline at end of file diff --git a/ContraCLIP/models/load_generator.py b/ContraCLIP/models/load_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..0361bfbc6846366d05274152a7ae9dc6f424d119 --- /dev/null +++ b/ContraCLIP/models/load_generator.py @@ -0,0 +1,43 @@ +import sys +from models.genforce.models import MODEL_ZOO +from models.genforce.models import build_generator +import os +import os.path as osp +import subprocess +import torch + + +def load_generator(model_name, latent_is_w=False, verbose=False, CHECKPOINT_DIR='models/pretrained/genforce/'): + + if verbose: + print(" \\__Building generator for model {}...".format(model_name), end="") + + model_config = MODEL_ZOO[model_name].copy() + url = model_config.pop('url') # URL to download model if needed. + model_config.update({'latent_is_w': latent_is_w}) + + # Build generator + generator = build_generator(**model_config) + if verbose: + print("Done!") + + # Load pre-trained weights. + os.makedirs(CHECKPOINT_DIR, exist_ok=True) + checkpoint_path = osp.join(CHECKPOINT_DIR, model_name + '.pth') + + if verbose: + print(" \\__Loading checkpoint from {}...".format(checkpoint_path), end="") + + if not osp.exists(checkpoint_path): + subprocess.call(['wget', '--quiet', '-O', checkpoint_path, url]) + checkpoint = torch.load(checkpoint_path, map_location='cpu') + if 'generator_smooth' in checkpoint: + generator.load_state_dict(checkpoint['generator_smooth']) + else: + generator.load_state_dict(checkpoint['generator']) + if verbose: + print("Done!") + + generator.dim_z = generator.z_space_dim + + return generator diff --git a/ContraCLIP/requirements.txt b/ContraCLIP/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8acc2f7a69e5751d06c586d7b8d308cd04e2fd19 --- /dev/null +++ b/ContraCLIP/requirements.txt @@ -0,0 +1,15 @@ +ipython +ipykernel +ipywidgets +numpy +sklearn +scikit-image +matplotlib +ftfy +regex +tqdm +opencv-python +requests +ninja +Tinkerer +tk diff --git a/ContraCLIP/sample_gan.py b/ContraCLIP/sample_gan.py new file mode 100644 index 0000000000000000000000000000000000000000..994dd90244be314db9301c4da6af79b41d70adeb --- /dev/null +++ b/ContraCLIP/sample_gan.py @@ -0,0 +1,145 @@ +import os +import os.path as osp +import argparse +import torch +import json +from hashlib import sha1 +from torchvision.transforms import ToPILImage +from lib import GENFORCE_MODELS, update_progress, update_stdout +from models.load_generator import load_generator + + +def tensor2image(tensor, img_size=None, adaptive=False): + # Squeeze tensor image + tensor = tensor.squeeze(dim=0) + if adaptive: + tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) + if img_size: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)).resize((img_size, img_size)) + else: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)) + else: + tensor = (tensor + 1) / 2 + tensor.clamp(0, 1) + if img_size: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)).resize((img_size, img_size)) + else: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)) + + +def main(): + """A script for sampling from a pre-trained GAN's latent space and generating images. The generated images, along + with the corresponding latent codes, will be stored under `experiments/latent_codes//`. + + Options: + -v, --verbose : set verbose mode on + --gan : set GAN generator (see GENFORCE_MODELS in lib/config.py) + --truncation : set W-space truncation parameter. If set, W-space codes will be truncated + --num-samples : set the number of latent codes to sample for generating images + --cuda : use CUDA (default) + --no-cuda : do not use CUDA + """ + parser = argparse.ArgumentParser(description="Sample a pre-trained GAN latent space and generate images") + parser.add_argument('-v', '--verbose', action='store_true', help="verbose mode on") + parser.add_argument('--gan', type=str, required=True, choices=GENFORCE_MODELS.keys(), help='GAN generator') + parser.add_argument('--truncation', type=float, default=1.0, help="W-space truncation parameter") + parser.add_argument('--num-samples', type=int, default=4, help="set number of latent codes to sample") + parser.add_argument('--cuda', dest='cuda', action='store_true', help="use CUDA during training") + parser.add_argument('--no-cuda', dest='cuda', action='store_false', help="do NOT use CUDA during training") + parser.set_defaults(cuda=True) + # ================================================================================================================ # + + # Parse given arguments + args = parser.parse_args() + + # Create output dir for generated images + out_dir = osp.join('experiments', 'latent_codes', args.gan) + out_dir = osp.join(out_dir, '{}-{}'.format(args.gan, args.num_samples)) + os.makedirs(out_dir, exist_ok=True) + + # Save argument in json file + with open(osp.join(out_dir, 'args.json'), 'w') as args_json_file: + json.dump(args.__dict__, args_json_file) + + # CUDA + use_cuda = False + if torch.cuda.is_available(): + if args.cuda: + use_cuda = True + torch.set_default_tensor_type('torch.cuda.FloatTensor') + else: + print("*** WARNING ***: It looks like you have a CUDA device, but aren't using CUDA.\n" + " Run with --cuda for optimal training speed.") + torch.set_default_tensor_type('torch.FloatTensor') + else: + torch.set_default_tensor_type('torch.FloatTensor') + + # Build GAN generator model and load with pre-trained weights + if args.verbose: + print("#. Build GAN generator model G and load with pre-trained weights...") + print(" \\__GAN generator : {} (res: {})".format(args.gan, GENFORCE_MODELS[args.gan][1])) + print(" \\__Pre-trained weights: {}".format(GENFORCE_MODELS[args.gan][0])) + + G = load_generator(model_name=args.gan, + latent_is_w='stylegan' in args.gan, + verbose=args.verbose).eval() + + # Upload GAN generator model to GPU + if use_cuda: + G = G.cuda() + + # Latent codes sampling + if args.verbose: + print("#. Sample {} {}-dimensional latent codes...".format(args.num_samples, G.dim_z)) + zs = torch.randn(args.num_samples, G.dim_z) + + if use_cuda: + zs = zs.cuda() + + if args.verbose: + print("#. Generate images...") + print(" \\__{}".format(out_dir)) + + # Iterate over given latent codes + for i in range(args.num_samples): + # Un-squeeze current latent code in shape [1, dim] and create hash code for it + z = zs[i, :].unsqueeze(0) + latent_code_hash = sha1(z.cpu().numpy()).hexdigest() + + if args.verbose: + update_progress( + " \\__.Latent code hash: {} [{:03d}/{:03d}] ".format(latent_code_hash, i + 1, args.num_samples), + args.num_samples, i) + + # Create directory for current latent code + latent_code_dir = osp.join(out_dir, '{}'.format(latent_code_hash)) + os.makedirs(latent_code_dir, exist_ok=True) + + if 'stylegan' in args.gan: + # Get the w+ and w codes for the given z code, save them, and the generated image based on the w code + # Note that w+ has torch.Size([1, 512]) and w torch.Size([18, 512]) -- the latter is just a repetition of + # the w code for all 18 layers + w_plus = G.get_w(z, truncation=args.truncation)[0, :, :] + w = w_plus[0, :].unsqueeze(0) + torch.save(z.cpu(), osp.join(latent_code_dir, 'latent_code_z.pt')) + torch.save(w.cpu(), osp.join(latent_code_dir, 'latent_code_w.pt')) + torch.save(w_plus.cpu(), osp.join(latent_code_dir, 'latent_code_w+.pt')) + + img_w = G(w).cpu() + tensor2image(img_w, adaptive=True).save(osp.join(latent_code_dir, 'image_w.jpg'), + "JPEG", quality=95, optimize=True, progressive=True) + else: + # Save latent code (Z-space), generate image for this code, and save the generated image + torch.save(z.cpu(), osp.join(latent_code_dir, 'latent_code_z.pt')) + img_z = G(z).cpu() + tensor2image(img_z, adaptive=True).save(osp.join(latent_code_dir, 'image_z.jpg'), + "JPEG", quality=95, optimize=True, progressive=True) + + if args.verbose: + update_stdout(1) + print() + print() + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/scripts/eval/pggan_celebahq1024_attributes.sh b/ContraCLIP/scripts/eval/pggan_celebahq1024_attributes.sh new file mode 100755 index 0000000000000000000000000000000000000000..eae1cb3c7999c14dc7c1b4c53f81916793281251 --- /dev/null +++ b/ContraCLIP/scripts/eval/pggan_celebahq1024_attributes.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="pggan_celebahq1024-8" +# ------------------------- +eps=0.15 +shift_leap=4 +batch_size=10 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(60) + + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_pggan_celebahq1024-Z-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-attributes" + "experiments/complete/ContraCLIP_pggan_celebahq1024-Z-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-attributes" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v --gif \ + --exp="${exp}" \ + --pool=${pool} \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/eval/stylegan2_afhqcat512_w+.sh b/ContraCLIP/scripts/eval/stylegan2_afhqcat512_w+.sh new file mode 100755 index 0000000000000000000000000000000000000000..cebe1032a085a6b2a4dd6a69f99c43835b151603 --- /dev/null +++ b/ContraCLIP/scripts/eval/stylegan2_afhqcat512_w+.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="stylegan2_afhqcat512-16" +# ------------------------- +eps=0.15 +shift_leap=3 +batch_size=20 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(180) + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_stylegan2_afhqcat512-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-cats" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v --gif \ + --exp="${exp}" \ + --pool="${pool}" \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/eval/stylegan2_afhqdog512_w+.sh b/ContraCLIP/scripts/eval/stylegan2_afhqdog512_w+.sh new file mode 100755 index 0000000000000000000000000000000000000000..04ea45f7cc48d278504ac75192b5cdf0dcf348e4 --- /dev/null +++ b/ContraCLIP/scripts/eval/stylegan2_afhqdog512_w+.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="stylegan2_afhqdog512-16" +# ------------------------- +eps=0.15 +shift_leap=5 +batch_size=20 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(120) + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_stylegan2_afhqdog512-W+-K4-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-dogs" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v --gif \ + --exp="${exp}" \ + --pool="${pool}" \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/eval/stylegan2_car512_w+.sh b/ContraCLIP/scripts/eval/stylegan2_car512_w+.sh new file mode 100755 index 0000000000000000000000000000000000000000..dabd068d1d8a85483fb2f03f25497567f3e62fac --- /dev/null +++ b/ContraCLIP/scripts/eval/stylegan2_car512_w+.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="stylegan2_car512-6" +# ------------------------- +eps=0.15 +shift_leap=3 +batch_size=20 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(140) + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_stylegan2_car512-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-cars" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v --gif \ + --exp="${exp}" \ + --pool="${pool}" \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_contrastive.sh b/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_contrastive.sh new file mode 100755 index 0000000000000000000000000000000000000000..13d14d3f07b251b5bc20a691ecd55fe4251ba967 --- /dev/null +++ b/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_contrastive.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="stylegan2_ffhq1024-8" +# ------------------------- +eps=0.15 +shift_leap=4 +batch_size=10 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(100) + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-attributes" + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-complex" + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-expressions3" + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K21-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-expressions" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v \ + --exp="${exp}" \ + --pool=${pool} \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_cossim.sh b/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_cossim.sh new file mode 100755 index 0000000000000000000000000000000000000000..3ef2f5015d4ecc833c2473600f7da941296b8a79 --- /dev/null +++ b/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_cossim.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="stylegan2_ffhq1024-8" +# ------------------------- +eps=0.15 +shift_leap=4 +batch_size=10 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(100) + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-expressions3" + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-complex" + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K21-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-expressions" + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K9-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-cossim-20000-attributes" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v \ + --exp="${exp}" \ + --pool=${pool} \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_maya.sh b/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_maya.sh new file mode 100755 index 0000000000000000000000000000000000000000..321f42de9a4e248b56bb2330bb36bc57fa75d1d1 --- /dev/null +++ b/ContraCLIP/scripts/eval/stylegan2_ffhq1024_w+_maya.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# ===== Configuration ===== +pool="maya" +# ------------------------- +eps=0.15 +shift_leap=3 +batch_size=10 +# ========================= + +# Define shift steps +declare -a SHIFT_STEPS=(100) + +# Define experiment directories list +declare -a EXPERIMENTS=( + "experiments/complete/ContraCLIP_stylegan2_ffhq1024-W+-K3-D64-lss_beta_0.5-eps0.1_0.2-nonlinear_css_beta_0.5-contrastive_0.07-20000-expressions3" + ) + +for shift_s in "${SHIFT_STEPS[@]}" +do + for exp in "${EXPERIMENTS[@]}" + do + python traverse_latent_space.py -v \ + --w-space \ + --exp="${exp}" \ + --pool=${pool} \ + --eps=${eps} \ + --shift-steps="${shift_s}" \ + --shift-leap=${shift_leap} \ + --batch-size=${batch_size} \ + --img-size=512 \ + --gif \ + --gif-height=256 \ + --strip \ + --strip-height=256 + done +done diff --git a/ContraCLIP/scripts/train/pggan_celebahq1024_attributes.sh b/ContraCLIP/scripts/train/pggan_celebahq1024_attributes.sh new file mode 100755 index 0000000000000000000000000000000000000000..18e479b89114497f966218a7f4797b49027157fe --- /dev/null +++ b/ContraCLIP/scripts/train/pggan_celebahq1024_attributes.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="pggan_celebahq1024" +corpus="attributes" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=9 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/pggan_celebahq1024_attributes_cossim.sh b/ContraCLIP/scripts/train/pggan_celebahq1024_attributes_cossim.sh new file mode 100755 index 0000000000000000000000000000000000000000..4f9122f2f22ded3febd4906ed47deb7d9e52cf74 --- /dev/null +++ b/ContraCLIP/scripts/train/pggan_celebahq1024_attributes_cossim.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="pggan_celebahq1024" +corpus="attributes" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="cossim" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=9 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_afhqcat512_w+_cats.sh b/ContraCLIP/scripts/train/stylegan2_afhqcat512_w+_cats.sh new file mode 100755 index 0000000000000000000000000000000000000000..1c60a228cda7e68cf7c55bf36cc3c495ff5d35ae --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_afhqcat512_w+_cats.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_afhqcat512" +stylegan_space="W+" +stylegan_layer=11 +corpus="cats" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=3 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_afhqdog512_w+_dogs.sh b/ContraCLIP/scripts/train/stylegan2_afhqdog512_w+_dogs.sh new file mode 100755 index 0000000000000000000000000000000000000000..f9cc34ebc914b534ca87a22e48eed49863d9db98 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_afhqdog512_w+_dogs.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_afhqdog512" +stylegan_space="W+" +stylegan_layer=11 +corpus="dogs" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=4 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_car512_w+_cars.sh b/ContraCLIP/scripts/train/stylegan2_car512_w+_cars.sh new file mode 100755 index 0000000000000000000000000000000000000000..a67403a8d0e651d3ad85134ef16ce0f06830f9d8 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_car512_w+_cars.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_car512" +stylegan_space="W+" +stylegan_layer=11 +corpus="cars" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=3 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_attributes.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_attributes.sh new file mode 100755 index 0000000000000000000000000000000000000000..0b4adafd8ef7a6293ade4cef36f084eca3b56b5b --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_attributes.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="attributes" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=9 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_attributes_cossim.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_attributes_cossim.sh new file mode 100755 index 0000000000000000000000000000000000000000..341b8daa6dc579ba6e708058cd10df47edd9fd52 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_attributes_cossim.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="attributes" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="cossim" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=9 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_complex.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_complex.sh new file mode 100755 index 0000000000000000000000000000000000000000..5bb98498c93f496a6c5f90d57b972707ce95273d --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_complex.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="complex" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=3 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_complex_cossim.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_complex_cossim.sh new file mode 100755 index 0000000000000000000000000000000000000000..72165e63b70c5e2cfc88878b7463468fbb81e8f8 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_complex_cossim.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="complex" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="cossim" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=3 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions.sh new file mode 100755 index 0000000000000000000000000000000000000000..2c8515f311b99429863ab7c890c0fb1f69889e59 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="expressions" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=14 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions3.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions3.sh new file mode 100755 index 0000000000000000000000000000000000000000..5984f396ce9b44e95f2ae067f1aea432edf5b663 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions3.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="expressions3" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="contrastive" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=3 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions3_cossim.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions3_cossim.sh new file mode 100755 index 0000000000000000000000000000000000000000..779d9974becfd6f8d944067f0ed236d720a0410c --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions3_cossim.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="expressions3" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="cossim" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=3 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions_cossim.sh b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions_cossim.sh new file mode 100755 index 0000000000000000000000000000000000000000..3b16c0248f32f90acfab6d0e3f40e649cd452628 --- /dev/null +++ b/ContraCLIP/scripts/train/stylegan2_ffhq1024_w+_expressions_cossim.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# =================================== # +# Experiment configuration # +# =================================== # + +# ======== GAN Type / Corpus ======== # +gan="stylegan2_ffhq1024" +stylegan_space="W+" +stylegan_layer=11 +corpus="expressions" + +# ==== Latent Support Sets (LSS) ==== # +num_latent_support_dipoles=64 +min_shift_magnitude=0.1 +max_shift_magnitude=0.2 +lss_beta=0.5 + +# ==== Corpus Support Sets (CSS) ==== # +linear=false +styleclip_like=false +loss="cossim" +temperature=0.07 +css_beta=0.5 + +# ============ Training ============= # +batch_size=14 +max_iter=20000 +# =================================== # + + +# Run training script +linear_text="" +if $linear ; then + linear_text="--linear" +fi + +styleclip="" +if $styleclip_like ; then + styleclip="--styleclip" +fi + +python train.py --gan=${gan} \ + --truncation=0.7 \ + --stylegan-space=${stylegan_space} \ + --stylegan-layer=${stylegan_layer} \ + --corpus=${corpus} \ + --num-latent-support-dipoles=${num_latent_support_dipoles} \ + --lss-beta=${lss_beta} \ + --loss=${loss} \ + --temperature=${temperature} \ + --css-beta=${css_beta} \ + ${linear_text} \ + ${styleclip} \ + --min-shift-magnitude=${min_shift_magnitude} \ + --max-shift-magnitude=${max_shift_magnitude} \ + --batch-size=${batch_size} \ + --max-iter=${max_iter} \ + --log-freq=10 \ + --ckp-freq=100 diff --git a/ContraCLIP/train.py b/ContraCLIP/train.py new file mode 100644 index 0000000000000000000000000000000000000000..cc2125471ce434f4ca47da8ee8a35c0bd7fa0bfb --- /dev/null +++ b/ContraCLIP/train.py @@ -0,0 +1,228 @@ +import sys +import argparse +import os.path as osp +import json +import torch +import clip +from lib import * +from lib import GENFORCE_MODELS, STYLEGAN_LAYERS, SEMANTIC_DIPOLES_CORPORA +from models.load_generator import load_generator + + +def main(): + """ContraCLIP -- Training script. + + Options: + ===[ GAN Generator (G) ]======================================================================================== + --gan : set pre-trained GAN generator (see GENFORCE_MODELS in lib/config.py) + --stylegan-space : set StyleGAN's latent space (Z, W, W+) to look for interpretable paths + TODO: add style space S + --stylegan-layer : choose up to which StyleGAN's layer to use for learning latent paths + E.g., if --stylegan-layer=11, then interpretable paths will be learnt in a + (12 * 512)-dimensional latent space. + --truncation : set W-space truncation parameter. If set, W-space codes will be truncated + + ===[ Corpus Support Sets (CSS) ]================================================================================ + --corpus : choose corpus of prompts (see config.py/PROMPT_CORPUS). The number of elements of + the tuple PROMPT_CORPUS[args.corpus] will define the number of the latent support + sets; i.e., the number of warping functions -- number of the interpretable latent + paths to be optimised + TODO: read corpus from input file + --css-beta : set beta parameter for fixing CLIP space RBFs' gamma parameters + (0.25 <= css_beta < 1.0) + --styleclip : use StyleCLIP approach for calculating image-text similarity + + ===[ Latent Support Sets (LSS) ]================================================================================ + --num-latent-support-dipoles : set number of support dipoles per support set + --lss-beta : set beta parameter for initializing latent space RBFs' gamma parameters + (0.0 < lss_beta < 1.0) + --lr : set learning rate for learning the latent support sets LSS (with Adam optimizer) + --linear : use the vector connecting the poles of the dipole for calculating image-text + similarity + --min-shift-magnitude : set minimum latent shift magnitude + --max-shift-magnitude : set maximum latent shift magnitude + + ===[ CLIP ]===================================================================================================== + + + ===[ Training ]================================================================================================= + --max-iter : set maximum number of training iterations + --batch-size : set training batch size + --loss : set loss function ('cossim', 'contrastive') + --temperature : set contrastive loss temperature + --log-freq : set number iterations per log + --ckp-freq : set number iterations per checkpoint model saving + + ===[ CUDA ]===================================================================================================== + --cuda : use CUDA during training (default) + --no-cuda : do NOT use CUDA during training + ================================================================================================================ + """ + parser = argparse.ArgumentParser(description="ContraCLIP training script") + + # === Experiment ID ============================================================================================== # + parser.add_argument('--exp-id', type=str, default='', help="set optional experiment ID") + + # === Pre-trained GAN Generator (G) ============================================================================== # + parser.add_argument('--gan', type=str, choices=GENFORCE_MODELS.keys(), help='GAN generator model') + parser.add_argument('--stylegan-space', type=str, default='Z', choices=('Z', 'W', 'W+'), + help="StyleGAN's latent space") + parser.add_argument('--stylegan-layer', type=int, default=11, choices=range(18), + help="choose up to which StyleGAN's layer to use for learning latent paths") + parser.add_argument('--truncation', type=float, help="latent code sampling truncation parameter") + + # === Corpus Support Sets (CSS) ================================================================================== # + parser.add_argument('--corpus', type=str, required=True, choices=SEMANTIC_DIPOLES_CORPORA.keys(), + help="choose corpus of semantic dipoles") + parser.add_argument('--css-beta', type=float, default=0.5, + help="set beta parameter for initializing CLIP space RBFs' gamma parameters " + "(0.25 <= css_beta < 1.0)") + parser.add_argument('--styleclip', action='store_true', + help="use StyleCLIP approach for calculating image-text similarity") + parser.add_argument('--linear', action='store_true', + help="use the vector connecting the poles of the dipole for calculating image-text similarity") + + # === Latent Support Sets (LSS) ================================================================================== # + parser.add_argument('--num-latent-support-dipoles', type=int, help="number of latent support dipoles / support set") + parser.add_argument('--lss-beta', type=float, default=0.1, + help="set beta parameter for initializing latent space RBFs' gamma parameters " + "(0.25 < css_beta < 1.0)") + parser.add_argument('--lr', type=float, default=1e-4, help="latent support sets LSS learning rate") + parser.add_argument('--min-shift-magnitude', type=float, default=0.25, help="minimum latent shift magnitude") + parser.add_argument('--max-shift-magnitude', type=float, default=0.45, help="maximum latent shift magnitude") + + # === Training =================================================================================================== # + parser.add_argument('--max-iter', type=int, default=10000, help="maximum number of training iterations") + parser.add_argument('--batch-size', type=int, required=True, help="training batch size -- this should be less than " + "or equal to the size of the given corpus") + parser.add_argument('--loss', type=str, default='cossim', choices=('cossim', 'contrastive'), + help="loss function") + parser.add_argument('--temperature', type=float, default=1.0, help="contrastive temperature") + parser.add_argument('--log-freq', default=10, type=int, help='number of iterations per log') + parser.add_argument('--ckp-freq', default=1000, type=int, help='number of iterations per checkpoint model saving') + + # === CUDA ======================================================================================================= # + parser.add_argument('--cuda', dest='cuda', action='store_true', help="use CUDA during training") + parser.add_argument('--no-cuda', dest='cuda', action='store_false', help="do NOT use CUDA during training") + parser.set_defaults(cuda=True) + # ================================================================================================================ # + + # Parse given arguments + args = parser.parse_args() + + # Check given batch size + if args.batch_size > len(SEMANTIC_DIPOLES_CORPORA[args.corpus]): + print("*** WARNING ***: Given batch size ({}) is greater than the size of the given corpus ({})\n" + " Set batch size to {}".format( + args.batch_size, len(SEMANTIC_DIPOLES_CORPORA[args.corpus]), + len(SEMANTIC_DIPOLES_CORPORA[args.corpus]))) + args.batch_size = len(SEMANTIC_DIPOLES_CORPORA[args.corpus]) + + # Check StyleGAN's layer + if 'stylegan' in args.gan: + if (args.stylegan_layer < 0) or (args.stylegan_layer > STYLEGAN_LAYERS[args.gan]-1): + raise ValueError("Invalid stylegan_layer for given GAN ({}). Choose between 0 and {}".format( + args.gan, STYLEGAN_LAYERS[args.gan]-1)) + + # Create output dir and save current arguments + exp_dir = create_exp_dir(args) + + # CUDA + use_cuda = False + multi_gpu = False + if torch.cuda.is_available(): + if args.cuda: + use_cuda = True + torch.set_default_tensor_type('torch.cuda.FloatTensor') + if torch.cuda.device_count() > 1: + multi_gpu = True + else: + print("*** WARNING ***: It looks like you have a CUDA device, but aren't using CUDA.\n" + " Run with --cuda for optimal training speed.") + torch.set_default_tensor_type('torch.FloatTensor') + else: + torch.set_default_tensor_type('torch.FloatTensor') + + # Build GAN generator model and load with pre-trained weights + print("#. Build GAN generator model G and load with pre-trained weights...") + print(" \\__GAN generator : {} (res: {})".format(args.gan, GENFORCE_MODELS[args.gan][1])) + print(" \\__Pre-trained weights: {}".format(GENFORCE_MODELS[args.gan][0])) + G = load_generator(model_name=args.gan, + latent_is_w=('stylegan' in args.gan) and ('W' in args.stylegan_space), + verbose=True).eval() + + # Upload GAN generator model to GPU + if use_cuda: + G = G.cuda() + + # Build pretrained CLIP model + print("#. Build pretrained CLIP model...") + clip_model, _ = clip.load("ViT-B/32", device='cuda' if use_cuda else 'cpu', jit=False) + clip_model.float() + clip_model.eval() + + # Get CLIP (non-normalized) text features for the prompts of the given corpus + prompt_f = PromptFeatures(prompt_corpus=SEMANTIC_DIPOLES_CORPORA[args.corpus], clip_model=clip_model) + prompt_features = prompt_f.get_prompt_features() + + # Build Corpus Support Sets model CSS + print("#. Build Corpus Support Sets CSS...") + print(" \\__Number of corpus support sets : {}".format(prompt_f.num_prompts)) + print(" \\__Number of corpus support dipoles : {}".format(1)) + print(" \\__Prompt features dim : {}".format(prompt_f.prompt_features_dim)) + print(" \\__Text RBF beta param : {}".format(args.css_beta)) + + CSS = SupportSets(prompt_features=prompt_features, css_beta=args.css_beta) + + # Count number of trainable parameters + CSS_trainable_parameters = sum(p.numel() for p in CSS.parameters() if p.requires_grad) + print(" \\__Trainable parameters: {:,}".format(CSS_trainable_parameters)) + + # Set support vector dimensionality and initial gamma param + support_vectors_dim = G.dim_z + if ('stylegan' in args.gan) and (args.stylegan_space == 'W+'): + support_vectors_dim *= (args.stylegan_layer + 1) + + # Get Jung radii + with open(osp.join('models', 'jung_radii.json'), 'r') as f: + jung_radii_dict = json.load(f) + + if 'stylegan' in args.gan: + if 'W+' in args.stylegan_space: + lm = jung_radii_dict[args.gan]['W']['{}'.format(args.stylegan_layer)] + elif 'W' in args.stylegan_space: + lm = jung_radii_dict[args.gan]['W']['0'] + else: + lm = jung_radii_dict[args.gan]['Z'] + jung_radius = lm[0] * args.truncation + lm[1] + else: + jung_radius = jung_radii_dict[args.gan]['Z'][1] + + # Build Latent Support Sets model LSS + print("#. Build Latent Support Sets LSS...") + print(" \\__Number of latent support sets : {}".format(prompt_f.num_prompts)) + print(" \\__Number of latent support dipoles : {}".format(args.num_latent_support_dipoles)) + print(" \\__Support Vectors dim : {}".format(support_vectors_dim)) + print(" \\__Latent RBF beta param (lss-beta) : {}".format(args.lss_beta)) + print(" \\__Jung radius : {}".format(jung_radius)) + + LSS = SupportSets(num_support_sets=prompt_f.num_prompts, + num_support_dipoles=args.num_latent_support_dipoles, + support_vectors_dim=support_vectors_dim, + lss_beta=args.lss_beta, + jung_radius=jung_radius) + + # Count number of trainable parameters + LSS_trainable_parameters = sum(p.numel() for p in LSS.parameters() if p.requires_grad) + print(" \\__Trainable parameters: {:,}".format(LSS_trainable_parameters)) + + # Set up trainer + print("#. Experiment: {}".format(exp_dir)) + t = Trainer(params=args, exp_dir=exp_dir, use_cuda=use_cuda, multi_gpu=multi_gpu) + + # Train + t.train(generator=G, latent_support_sets=LSS, corpus_support_sets=CSS, clip_model=clip_model) + + +if __name__ == '__main__': + main() diff --git a/ContraCLIP/traverse_latent_space.py b/ContraCLIP/traverse_latent_space.py new file mode 100644 index 0000000000000000000000000000000000000000..4011bdd0fdbc06da172d43c0a5ca463e11d9aea3 --- /dev/null +++ b/ContraCLIP/traverse_latent_space.py @@ -0,0 +1,591 @@ +import argparse +import os +import os.path as osp +import torch +from torch import nn +import torch.nn.functional as F +from PIL import Image, ImageDraw +import json +from torchvision.transforms import ToPILImage +from lib import SupportSets, GENFORCE_MODELS, update_progress, update_stdout, STYLEGAN_LAYERS +from models.load_generator import load_generator + + +class DataParallelPassthrough(nn.DataParallel): + def __getattr__(self, name): + try: + return super(DataParallelPassthrough, self).__getattr__(name) + except AttributeError: + return getattr(self.module, name) + + +class ModelArgs: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + +def tensor2image(tensor, img_size=None, adaptive=False): + # Squeeze tensor image + tensor = tensor.squeeze(dim=0) + if adaptive: + tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min()) + if img_size: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)).resize((img_size, img_size)) + else: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)) + else: + tensor = (tensor + 1) / 2 + tensor.clamp(0, 1) + if img_size: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)).resize((img_size, img_size)) + else: + return ToPILImage()((255 * tensor.cpu().detach()).to(torch.uint8)) + + +def one_hot(dims, value, idx): + vec = torch.zeros(dims) + vec[idx] = value + return vec + + +def create_strip(image_list, N=5, strip_height=256): + """Create strip of images across a given latent path. + + Args: + image_list (list) : list of images (PIL.Image.Image) across a given latent path + N (int) : number of images in strip + strip_height (int) : strip height in pixels -- its width will be N * strip_height + + + Returns: + transformed_images_strip (PIL.Image.Image) : strip PIL image + """ + step = len(image_list) // N + 1 + transformed_images_strip = Image.new('RGB', (N * strip_height, strip_height)) + for i in range(N): + j = i * step if i * step < len(image_list) else len(image_list) - 1 + transformed_images_strip.paste(image_list[j].resize((strip_height, strip_height)), (i * strip_height, 0)) + return transformed_images_strip + + +def create_gif(image_list, gif_height=256): + """Create gif frames for images across a given latent path. + + Args: + image_list (list) : list of images (PIL.Image.Image) across a given latent path + gif_height (int) : gif height in pixels -- its width will be N * gif_height + + Returns: + transformed_images_gif_frames (list): list of gif frames in PIL (PIL.Image.Image) + """ + transformed_images_gif_frames = [] + for i in range(len(image_list)): + # Create gif frame + gif_frame = Image.new('RGB', (2 * gif_height, gif_height)) + gif_frame.paste(image_list[len(image_list) // 2].resize((gif_height, gif_height)), (0, 0)) + gif_frame.paste(image_list[i].resize((gif_height, gif_height)), (gif_height, 0)) + + # Draw progress bar + draw_bar = ImageDraw.Draw(gif_frame) + bar_h = 12 + bar_colour = (252, 186, 3) + draw_bar.rectangle(xy=((gif_height, gif_height - bar_h), + ((1 + (i / len(image_list))) * gif_height, gif_height)), + fill=bar_colour) + + transformed_images_gif_frames.append(gif_frame) + + return transformed_images_gif_frames + + +def main(): + """ContraCLIP -- Latent space traversal script. + + A script for traversing the latent space of a pre-trained GAN generator through paths defined by the warpings of + a set of pre-trained support vectors. Latent codes are drawn from a pre-defined collection via the `--pool` + argument. The generated images are stored under `results/` directory. + + Options: + ================================================================================================================ + -v, --verbose : set verbose mode on + ================================================================================================================ + --exp : set experiment's model dir, as created by `train.py`, i.e., it should contain a subdirectory + `models/` with two files, namely `reconstructor.pt` and `support_sets.pt`, which + contain the weights for the reconstructor and the support sets, respectively, and an `args.json` + file that contains the arguments the model has been trained with. + --pool : directory of pre-defined pool of latent codes (created by `sample_gan.py`) + --w-space : latent codes in the pool are in W/W+ space (typically as inverted codes of real images) + ================================================================================================================ + --shift-steps : set number of shifts to be applied to each latent code at each direction (positive/negative). + That is, the total number of shifts applied to each latent code will be equal to + 2 * args.shift_steps. + --eps : set shift step magnitude for generating G(z'), where z' = z +/- eps * direction. + --shift-leap : set path shift leap (after how many steps to generate images) + --batch-size : set generator batch size (if not set, use the total number of images per path) + --img-size : set size of saved generated images (if not set, use the output size of the respective GAN + generator) + --img-quality : JPEG image quality (max 95) + --gif : generate collated GIF images for all paths and all latent codes + --gif-height : set GIF image height -- width will be 2 * args.gif_height + --gif-fps : set number of frames per second for the generated GIF images + --strip : create traversal strip images + --strip-number : set number of images per strip + --strip-height : set strip height -- width will be 2 * args.strip_height + ================================================================================================================ + --cuda : use CUDA (default) + --no-cuda : do not use CUDA + ================================================================================================================ + """ + parser = argparse.ArgumentParser(description="ContraCLIP latent space traversal script") + parser.add_argument('-v', '--verbose', action='store_true', help="set verbose mode on") + # ================================================================================================================ # + parser.add_argument('--w-space', action='store_true', help="latent codes are given in the W-space") + parser.add_argument('--exp', type=str, required=True, help="set experiment's model dir (created by `train.py`)") + parser.add_argument('--pool', type=str, required=True, help="directory of pre-defined pool of latent codes" + "(created by `sample_gan.py`)") + parser.add_argument('--shift-steps', type=int, default=16, help="set number of shifts per positive/negative path " + "direction") + parser.add_argument('--eps', type=float, default=0.2, help="set shift step magnitude") + parser.add_argument('--shift-leap', type=int, default=1, + help="set path shift leap (after how many steps to generate images)") + parser.add_argument('--batch-size', type=int, help="set generator batch size (if not set, use the total number of " + "images per path)") + parser.add_argument('--img-size', type=int, help="set size of saved generated images (if not set, use the output " + "size of the respective GAN generator)") + parser.add_argument('--img-quality', type=int, default=50, help="set JPEG image quality") + + parser.add_argument('--strip', action='store_true', help="create traversal strip images") + parser.add_argument('--strip-number', type=int, default=9, help="set number of images per strip") + parser.add_argument('--strip-height', type=int, default=256, help="set strip height") + parser.add_argument('--gif', action='store_true', help="create GIF traversals") + parser.add_argument('--gif-height', type=int, default=256, help="set gif height") + parser.add_argument('--gif-fps', type=int, default=30, help="set gif frame rate") + # ================================================================================================================ # + parser.add_argument('--cuda', dest='cuda', action='store_true', help="use CUDA during training") + parser.add_argument('--no-cuda', dest='cuda', action='store_false', help="do NOT use CUDA during training") + parser.set_defaults(cuda=True) + # ================================================================================================================ # + + # Parse given arguments + args = parser.parse_args() + + # Check structure of `args.exp` + if not osp.isdir(args.exp): + raise NotADirectoryError("Invalid given directory: {}".format(args.exp)) + + # -- args.json file (pre-trained model arguments) + args_json_file = osp.join(args.exp, 'args.json') + if not osp.isfile(args_json_file): + raise FileNotFoundError("File not found: {}".format(args_json_file)) + args_json = ModelArgs(**json.load(open(args_json_file))) + gan = args_json.__dict__["gan"] + stylegan_space = args_json.__dict__["stylegan_space"] + stylegan_layer = args_json.__dict__["stylegan_layer"] if "stylegan_layer" in args_json.__dict__ else None + truncation = args_json.__dict__["truncation"] + + # TODO: Check if `--w-space` is valid + if args.w_space and (('stylegan' not in gan) or ('W' not in stylegan_space)): + raise NotImplementedError + + # -- models directory (support sets and reconstructor, final or checkpoint files) + models_dir = osp.join(args.exp, 'models') + if not osp.isdir(models_dir): + raise NotADirectoryError("Invalid models directory: {}".format(models_dir)) + + # ---- Get all files of models directory + models_dir_files = [f for f in os.listdir(models_dir) if osp.isfile(osp.join(models_dir, f))] + + # ---- Check for latent support sets (LSS) model file (final or checkpoint) + latent_support_sets_model = osp.join(models_dir, 'latent_support_sets.pt') + model_iter = '' + if not osp.isfile(latent_support_sets_model): + latent_support_sets_checkpoint_files = [] + for f in models_dir_files: + if 'latent_support_sets-' in f: + latent_support_sets_checkpoint_files.append(f) + latent_support_sets_checkpoint_files.sort() + latent_support_sets_model = osp.join(models_dir, latent_support_sets_checkpoint_files[-1]) + model_iter = '-{}'.format(latent_support_sets_checkpoint_files[-1].split('.')[0].split('-')[-1]) + + # -- Get prompt corpus list + with open(osp.join(models_dir, 'semantic_dipoles.json'), 'r') as f: + semantic_dipoles = json.load(f) + + # Check given pool directory + pool = osp.join('experiments', 'latent_codes', gan, args.pool) + if not osp.isdir(pool): + raise NotADirectoryError("Invalid pool directory: {} -- Please run sample_gan.py to create it.".format(pool)) + + # CUDA + use_cuda = False + multi_gpu = False + if torch.cuda.is_available(): + if args.cuda: + use_cuda = True + torch.set_default_tensor_type('torch.cuda.FloatTensor') + if torch.cuda.device_count() > 1: + multi_gpu = True + else: + print("*** WARNING ***: It looks like you have a CUDA device, but aren't using CUDA.\n" + " Run with --cuda for optimal training speed.") + torch.set_default_tensor_type('torch.FloatTensor') + else: + torch.set_default_tensor_type('torch.FloatTensor') + + # Build GAN generator model and load with pre-trained weights + if args.verbose: + print("#. Build GAN generator model G and load with pre-trained weights...") + print(" \\__GAN generator : {} (res: {})".format(gan, GENFORCE_MODELS[gan][1])) + print(" \\__Pre-trained weights: {}".format(GENFORCE_MODELS[gan][0])) + + G = load_generator(model_name=gan, + latent_is_w=('stylegan' in gan) and ('W' in args_json.__dict__["stylegan_space"]), + verbose=args.verbose).eval() + + # Upload GAN generator model to GPU + if use_cuda: + G = G.cuda() + + # Parallelize GAN generator model into multiple GPUs if available + if multi_gpu: + G = DataParallelPassthrough(G) + + # Build latent support sets model LSS + if args.verbose: + print("#. Build Latent Support Sets model LSS...") + + # Get support vector dimensionality + support_vectors_dim = G.dim_z + if ('stylegan' in gan) and (stylegan_space == 'W+'): + support_vectors_dim *= (stylegan_layer + 1) + + LSS = SupportSets(num_support_sets=len(semantic_dipoles), + num_support_dipoles=args_json.__dict__["num_latent_support_dipoles"], + support_vectors_dim=support_vectors_dim, + jung_radius=1) + + # Load pre-trained weights and set to evaluation mode + if args.verbose: + print(" \\__Pre-trained weights: {}".format(latent_support_sets_model)) + LSS.load_state_dict(torch.load(latent_support_sets_model, map_location=lambda storage, loc: storage)) + if args.verbose: + print(" \\__Set to evaluation mode") + LSS.eval() + + # Upload support sets model to GPU + if use_cuda: + LSS = LSS.cuda() + + # Set number of generative paths + num_gen_paths = LSS.num_support_sets + + # Create output dir for generated images + out_dir = osp.join(args.exp, 'results', args.pool + model_iter, + '{}_{}_{}'.format(2 * args.shift_steps, args.eps, round(2 * args.shift_steps * args.eps, 3))) + os.makedirs(out_dir, exist_ok=True) + + # Set default batch size + if args.batch_size is None: + args.batch_size = 2 * args.shift_steps + 1 + + ## ============================================================================================================== ## + ## ## + ## [Latent Codes Pool] ## + ## ## + ## ============================================================================================================== ## + # Get latent codes from the given pool + if args.verbose: + print("#. Use latent codes from pool {}...".format(args.pool)) + latent_codes_dirs = [dI for dI in os.listdir(pool) if os.path.isdir(os.path.join(pool, dI))] + latent_codes_dirs.sort() + latent_codes_list = [torch.load(osp.join(pool, subdir, 'latent_code_{}.pt'.format('w+' if args.w_space else 'z')), + map_location=lambda storage, loc: storage) for subdir in latent_codes_dirs] + + # Get latent codes in torch Tensor format -- xs refers to z or w+ codes + xs = torch.cat(latent_codes_list) + if use_cuda: + xs = xs.cuda() + num_of_latent_codes = xs.size()[0] + + ## ============================================================================================================== ## + ## ## + ## [Latent space traversal] ## + ## ## + ## ============================================================================================================== ## + if args.verbose: + print("#. Traverse latent space...") + print(" \\__Experiment : {}".format(osp.basename(osp.abspath(args.exp)))) + print(" \\__Number of test latent codes : {}".format(num_of_latent_codes)) + print(" \\__Test latent codes shape : {}".format(xs.shape)) + print(" \\__Shift magnitude : {}".format(args.eps)) + print(" \\__Shift steps : {}".format(2 * args.shift_steps)) + print(" \\__Traversal length : {}".format(round(2 * args.shift_steps * args.eps, 3))) + + # Iterate over given latent codes + for i in range(num_of_latent_codes): + # Get latent code + x_ = xs[i, :].unsqueeze(0) + + latent_code_hash = latent_codes_dirs[i] + if args.verbose: + update_progress(" \\__.Latent code hash: {} [{:03d}/{:03d}] ".format(latent_code_hash, + i+1, + num_of_latent_codes), + num_of_latent_codes, i) + + # Create directory for current latent code + latent_code_dir = osp.join(out_dir, '{}'.format(latent_code_hash)) + os.makedirs(latent_code_dir, exist_ok=True) + + # Create directory for storing path images + transformed_images_root_dir = osp.join(latent_code_dir, 'paths_images') + os.makedirs(transformed_images_root_dir, exist_ok=True) + transformed_images_strips_root_dir = osp.join(latent_code_dir, 'paths_strips') + os.makedirs(transformed_images_strips_root_dir, exist_ok=True) + + # Keep all latent paths the current latent code (sample) + paths_latent_codes = [] + + # Keep phi coefficients + phi_coeffs = dict() + + ## ========================================================================================================== ## + ## ## + ## [ Path Traversal ] ## + ## ## + ## ========================================================================================================== ## + # Iterate over (interpretable) directions + for dim in range(num_gen_paths): + if args.verbose: + print() + update_progress(" \\__path: {:03d}/{:03d} ".format(dim + 1, num_gen_paths), num_gen_paths, dim + 1) + + # Create shifted latent codes (for the given latent code z) and generate transformed images + transformed_images = [] + + # Current path's latent codes and shifts lists + latent_code = x_ + if (not args.w_space) and ('stylegan' in gan) and ('W' in stylegan_space): + latent_code = G.get_w(x_, truncation=truncation) + if stylegan_space == 'W': + latent_code = latent_code[:, 0, :] + current_path_latent_codes = [latent_code] + current_path_latent_shifts = [torch.zeros_like(latent_code).cuda() if use_cuda + else torch.zeros_like(latent_code)] + + ## ====================================================================================================== ## + ## ## + ## [ Traverse through current path (positive/negative directions) ] ## + ## ## + ## ====================================================================================================== ## + # == Positive direction == + latent_code = x_.clone() + if (not args.w_space) and ('stylegan' in gan) and ('W' in stylegan_space): + latent_code = G.get_w(x_, truncation=truncation).clone() + if stylegan_space == 'W': + latent_code = latent_code[:, 0, :] + + cnt = 0 + for _ in range(args.shift_steps): + cnt += 1 + + # Calculate shift vector based on current z + support_sets_mask = torch.zeros(1, LSS.num_support_sets) + support_sets_mask[0, dim] = 1.0 + if use_cuda: + support_sets_mask.cuda() + + # Get latent space shift vector and shifted latent code + if ('stylegan' in gan) and (stylegan_space == 'W+'): + with torch.no_grad(): + shift = args.eps * LSS(support_sets_mask, + latent_code[:, :stylegan_layer + 1, :].reshape(latent_code.shape[0], -1)) + latent_code = latent_code + \ + F.pad(input=shift, pad=(0, (STYLEGAN_LAYERS[gan] - 1 - stylegan_layer) * 512), + mode='constant', value=0).reshape_as(latent_code) + current_path_latent_code = latent_code + else: + with torch.no_grad(): + shift = args.eps * LSS(support_sets_mask, latent_code) + latent_code = latent_code + shift + current_path_latent_code = latent_code + + # Store latent codes and shifts + if cnt == args.shift_leap: + if ('stylegan' in gan) and (stylegan_space == 'W+'): + current_path_latent_shifts.append( + F.pad(input=shift, pad=(0, (STYLEGAN_LAYERS[gan] - 1 - stylegan_layer) * 512), + mode='constant', value=0).reshape_as(latent_code)) + else: + current_path_latent_shifts.append(shift) + current_path_latent_codes.append(current_path_latent_code) + cnt = 0 + positive_endpoint = latent_code.clone().reshape(1, -1) + # ======================== + + # == Negative direction == + latent_code = x_.clone() + if (not args.w_space) and ('stylegan' in gan) and ('W' in stylegan_space): + latent_code = G.get_w(x_, truncation=truncation).clone() + if stylegan_space == 'W': + latent_code = latent_code[:, 0, :] + cnt = 0 + for _ in range(args.shift_steps): + cnt += 1 + # Calculate shift vector based on current z + support_sets_mask = torch.zeros(1, LSS.num_support_sets) + support_sets_mask[0, dim] = 1.0 + if use_cuda: + support_sets_mask.cuda() + + # Get latent space shift vector and shifted latent code + if ('stylegan' in gan) and (stylegan_space == 'W+'): + with torch.no_grad(): + shift = -args.eps * LSS( + support_sets_mask, latent_code[:, :stylegan_layer + 1, :].reshape(latent_code.shape[0], -1)) + latent_code = latent_code + \ + F.pad(input=shift, pad=(0, (STYLEGAN_LAYERS[gan] - 1 - stylegan_layer) * 512), + mode='constant', value=0).reshape_as(latent_code) + current_path_latent_code = latent_code + else: + with torch.no_grad(): + shift = -args.eps * LSS(support_sets_mask, latent_code) + latent_code = latent_code + shift + current_path_latent_code = latent_code + + # Store latent codes and shifts + if cnt == args.shift_leap: + if ('stylegan' in gan) and (stylegan_space == 'W+'): + current_path_latent_shifts = \ + [F.pad(input=shift, pad=(0, (STYLEGAN_LAYERS[gan] - 1 - stylegan_layer) * 512), + mode='constant', value=0).reshape_as(latent_code)] + current_path_latent_shifts + else: + current_path_latent_shifts = [shift] + current_path_latent_shifts + current_path_latent_codes = [current_path_latent_code] + current_path_latent_codes + cnt = 0 + negative_endpoint = latent_code.clone().reshape(1, -1) + # ======================== + + # Calculate latent path phi coefficient (end-to-end distance / latent path length) + phi = torch.norm(negative_endpoint - positive_endpoint, dim=1).item() / (2 * args.shift_steps * args.eps) + phi_coeffs.update({dim: phi}) + + # Generate transformed images + # Split latent codes and shifts in batches + current_path_latent_codes = torch.cat(current_path_latent_codes) + current_path_latent_codes_batches = torch.split(current_path_latent_codes, args.batch_size) + current_path_latent_shifts = torch.cat(current_path_latent_shifts) + current_path_latent_shifts_batches = torch.split(current_path_latent_shifts, args.batch_size) + if len(current_path_latent_codes_batches) != len(current_path_latent_shifts_batches): + raise AssertionError() + else: + num_batches = len(current_path_latent_codes_batches) + + transformed_img = [] + for t in range(num_batches): + with torch.no_grad(): + transformed_img.append(G(current_path_latent_codes_batches[t] + + current_path_latent_shifts_batches[t])) + transformed_img = torch.cat(transformed_img) + + # Convert tensors (transformed images) into PIL images + for t in range(transformed_img.shape[0]): + transformed_images.append(tensor2image(transformed_img[t, :].cpu(), + img_size=args.img_size, + adaptive=True)) + # Save all images in `transformed_images` list under `transformed_images_root_dir//` + transformed_images_dir = osp.join(transformed_images_root_dir, 'path_{:03d}'.format(dim)) + os.makedirs(transformed_images_dir, exist_ok=True) + + for t in range(len(transformed_images)): + transformed_images[t].save(osp.join(transformed_images_dir, '{:06d}.jpg'.format(t)), + "JPEG", quality=args.img_quality, optimize=True, progressive=True) + # Save original image + if (t == len(transformed_images) // 2) and (dim == 0): + transformed_images[t].save(osp.join(latent_code_dir, 'original_image.jpg'), + "JPEG", quality=95, optimize=True, progressive=True) + + # Create strip of images + transformed_images_strip = create_strip(image_list=transformed_images, N=args.strip_number, + strip_height=args.strip_height) + transformed_images_strip.save(osp.join(transformed_images_strips_root_dir, + 'path_{:03d}_strip.jpg'.format(dim)), + "JPEG", quality=args.img_quality, optimize=True, progressive=True) + + # Save gif (static original image + traversal gif) + transformed_images_gif_frames = create_gif(transformed_images, gif_height=args.gif_height) + im = Image.new(mode='RGB', size=(2 * args.gif_height, args.gif_height)) + im.save(fp=osp.join(transformed_images_strips_root_dir, 'path_{:03d}.gif'.format(dim)), + append_images=transformed_images_gif_frames, + save_all=True, + optimize=True, + loop=0, + duration=1000 // args.gif_fps) + + # Append latent paths + paths_latent_codes.append(current_path_latent_codes.unsqueeze(0)) + + if args.verbose: + update_stdout(1) + # ============================================================================================================ # + + # Save all latent paths and shifts for the current latent code (sample) in a tensor of size: + # paths_latent_codes : torch.Size([num_gen_paths, 2 * args.shift_steps + 1, G.dim_z]) + torch.save(torch.cat(paths_latent_codes), osp.join(latent_code_dir, 'paths_latent_codes.pt')) + + if args.verbose: + update_stdout(1) + print() + print() + + # Create summarizing MD files + if args.gif or args.strip: + # For each interpretable path (warping function), collect the generated image sequences for each original latent + # code and collate them into a GIF file + print("#. Write summarizing MD files...") + + # Write .md summary files + if args.gif: + md_summary_file = osp.join(out_dir, 'results.md') + md_summary_file_f = open(md_summary_file, "w") + md_summary_file_f.write("# Experiment: {}\n".format(args.exp)) + + if args.strip: + md_summary_strips_file = osp.join(out_dir, 'results_strips.md') + md_summary_strips_file_f = open(md_summary_strips_file, "w") + md_summary_strips_file_f.write("# Experiment: {}\n".format(args.exp)) + + if args.gif or args.strip: + for dim in range(num_gen_paths): + # Append to .md summary files + if args.gif: + md_summary_file_f.write("### \"{}\" → \"{}\"\n".format(semantic_dipoles[dim][1], + semantic_dipoles[dim][0])) + md_summary_file_f.write("

\n") + if args.strip: + md_summary_strips_file_f.write("## \"{}\" → \"{}\"\n".format(semantic_dipoles[dim][1], + semantic_dipoles[dim][0])) + md_summary_strips_file_f.write("

\n") + + for lc in latent_codes_dirs: + if args.gif: + md_summary_file_f.write("\n".format( + osp.join(lc, 'paths_strips', 'path_{:03d}.gif'.format(dim)))) + if args.strip: + md_summary_strips_file_f.write("\n".format( + osp.join(lc, 'paths_strips', 'path_{:03d}_strip.jpg'.format(dim)))) + if args.gif: + md_summary_file_f.write("phi={}\n".format(phi_coeffs[dim])) + md_summary_file_f.write("

\n") + if args.strip: + md_summary_strips_file_f.write("phi={}\n".format(phi_coeffs[dim])) + md_summary_strips_file_f.write("

\n") + + if args.gif: + md_summary_file_f.close() + if args.strip: + md_summary_strips_file_f.close() + + +if __name__ == '__main__': + main()