# Loss for Portrait4D, modified from EG3D: https://github.com/NVlabs/eg3d # SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NvidiaProprietary # # NVIDIA CORPORATION, its affiliates and licensors retain all intellectual # property and proprietary rights in and to this material, related # documentation and any modifications thereto. Any use, reproduction, # disclosure or distribution of this material and related documentation # without an express license agreement from NVIDIA CORPORATION or # its affiliates is strictly prohibited. """Loss functions.""" import numpy as np import PIL import torch import torch.nn.functional as F import torchvision.transforms as transform # from kornia.geometry import warp_affine from torch_utils import training_stats from torch_utils.ops import conv2d_gradfix from torch_utils.ops import upfirdn2d from recon.training.discriminator.dual_discriminator_next3D import filtered_resizing import cv2 from PIL import Image # from recon.utils.preprocess import estimate_norm_torch, estimate_norm_torch_pdfgc from camera_utils import LookAtPoseSampler, FOV_to_intrinsics # ---------------------------------------------------------------------------- class Loss: def accumulate_gradients(self, phase, real_img_app, real_img_mot, real_img_recon, real_depth_recon, real_feature_recon, real_triplane_recon, real_c_recon, motions_app, motions, gain, cur_nimg, real_cano_tri, real_ref_tri, motion_scale=1.0, swapping_prob=0.5, half_static=False): # to be overridden by subclass raise NotImplementedError() # ---------------------------------------------------------------------------- class AnimatableGalleryPortraitReconLoss(Loss): def __init__(self, device, G, D, G_syn, D_patch=None, augment_pipe=None, lpips=None, facenet=None, pd_fgc=None, gmain=1.0, r1_gamma=10, r1_gamma_patch=10, r1_gamma_uv=30, r1_gamma_seg=10, style_mixing_prob=0, pl_weight=0, pl_batch_shrink=2, pl_decay=0.01, pl_no_weight_grad=False, blur_init_sigma=0, blur_init_sigma_patch=0, blur_fade_kimg=0, blur_patch_seg=0, r1_gamma_init=0, r1_gamma_fade_kimg=0, neural_rendering_resolution_initial=64, neural_rendering_resolution_final=None, neural_rendering_resolution_fade_kimg=0, gpc_reg_fade_kimg=1000, gpc_reg_prob=None, discrimination_kimg=1000, dual_discrimination=False, filter_mode='antialiased', patch_scale=1.0, patch_gan=0.2, masked_sampling=None, perturb_params=False, id_loss=None, use_D=True, truncation_psi=0.7, conditioning_params=None, w_avg=None): super().__init__() self.device = device self.G = G self.D = D self.G_syn = G_syn self.D_patch = D_patch self.augment_pipe = augment_pipe self.lpips = lpips self.pd_fgc = pd_fgc self.gmain = gmain self.r1_gamma = r1_gamma self.r1_gamma_patch = r1_gamma_patch self.r1_gamma_uv = r1_gamma_uv self.r1_gamma_seg = r1_gamma_seg self.style_mixing_prob = style_mixing_prob self.pl_weight = pl_weight self.pl_batch_shrink = pl_batch_shrink self.pl_decay = pl_decay self.pl_no_weight_grad = pl_no_weight_grad self.pl_mean = torch.zeros([], device=device) self.blur_init_sigma = blur_init_sigma self.blur_init_sigma_patch = blur_init_sigma_patch self.blur_fade_kimg = blur_fade_kimg self.blur_patch_seg = blur_patch_seg self.r1_gamma_init = r1_gamma_init self.r1_gamma_fade_kimg = r1_gamma_fade_kimg self.bg_reg = True self.c_headpose = False self.neural_rendering_resolution_initial = neural_rendering_resolution_initial self.neural_rendering_resolution_final = neural_rendering_resolution_final self.neural_rendering_resolution_fade_kimg = neural_rendering_resolution_fade_kimg self.gpc_reg_fade_kimg = gpc_reg_fade_kimg self.gpc_reg_prob = gpc_reg_prob self.discrimination_kimg = discrimination_kimg self.dual_discrimination = dual_discrimination self.filter_mode = filter_mode self.resample_filter = upfirdn2d.setup_filter([1, 3, 3, 1], device=device) self.blur_raw_target = True assert self.gpc_reg_prob is None or (0 <= self.gpc_reg_prob <= 1) self.patch_scale = patch_scale self.masked_sampling = masked_sampling self.patch_gan = patch_gan self.perturb_params = perturb_params self.use_D = use_D self.truncation_psi = truncation_psi self.conditioning_params = torch.load(conditioning_params ).to(device) self.w_avg = torch.load(w_avg).to(device)[0] self.id_loss = id_loss.to(device) # extract pdfgc motion embedding # def get_motion_feature(self, imgs, lmks, crop_size=224, crop_len=16): # # trans_m = estimate_norm_torch_pdfgc(lmks, imgs.shape[-1]) # imgs_warp = warp_affine(imgs, trans_m, dsize=(224, 224)) # imgs_warp = imgs_warp[:, :, :crop_size - crop_len * 2, crop_len:crop_size - crop_len] # imgs_warp = torch.clamp(F.interpolate(imgs_warp, size=[crop_size, crop_size], mode='bilinear'), -1, 1) # # out = self.pd_fgc(imgs_warp) # motions = torch.cat([out[1], out[2], out[3]], dim=-1) # # return motions # generate online training data using pre-trained Next3d model. the first frame is fixed @torch.no_grad() def gen_data_by_G_next3D(self, z, exp_params, c, model_name_list, render_res=64): out_sr_img = [] out_img_raw = [] out_img_depth = [] out_static_plane = [] out_feature_img = [] out_triplane = [] out_texture = [] out_rendering_stitch = [] batchsize_subject = z.shape[0] # assert batchsize_subject == 3 # cam_pivot = torch.tensor(self.G_syn.rendering_kwargs.get('avg_camera_pivot', [0, 0, 0]), device=device) # cam_radius = self.G_syn.rendering_kwargs.get('avg_camera_radius', 2.7) # conditioning_cam2world_pose = LookAtPoseSampler.sample(np.pi / 2, np.pi / 2, cam_pivot, radius=cam_radius, # device=device) # conditioning_params = torch.cat([conditioning_cam2world_pose.reshape(-1, 16), intrinsics.reshape(-1, 9)], 1).to( # device) # w = G.mapping(z, conditioning_params, truncation_psi=0.7, truncation_cutoff=14) # latent = latent.reshape(-1, *latent.shape[2:]) assert len(model_name_list) == z.shape[0] model_name_list = [name for name1 in model_name_list for name in name1 ] z = z.reshape(-1, *z.shape[2:]) # (b*3, 512) assert len(model_name_list) == z.shape[0] exp_params = exp_params.reshape(-1, *exp_params.shape[2:]) # (b*3, 100) c = c.reshape(-1, *c.shape[2:]) # (b*3, 25) # out_motion = [real_motion_1, real_motion_2] # random head rotation angle_ys_head = torch.rand((z.shape[0], 1), device=z.device) * 0.60 * 2 - 0.60 angle_ys_head2 = torch.rand((z.shape[0], 1), device=z.device) * 0.35 * 2 - 0.35 + 0.2 # angle_ys_head3 = torch.rand((z.shape[0], 1), device=z.device) * 0.25 * 2 - 0.25 # random camera pose cam_pivot_x = torch.rand((z.shape[0], 1), device=z.device) * 0.02 - 0.01 cam_pivot_y = torch.rand((z.shape[0], 1), device=z.device) * 0.02 - 0.01 cam_pivot_z = torch.rand((z.shape[0], 1), device=z.device) * 0.02 - 0.01 + 0.03 cam_pivot = torch.cat([cam_pivot_x * 3, cam_pivot_y * 3, cam_pivot_z * 3], dim=-1) # cam_radius = torch.rand((z.shape[0], 1), device=z.device) * 0.8 + 2.7 cam2world_pose = LookAtPoseSampler.sample(np.pi / 2 + angle_ys_head, np.pi / 2 - angle_ys_head2, cam_pivot, radius=2.7, batch_size=z.shape[0], device=z.device) c_syn = torch.cat([cam2world_pose.reshape(-1, 16), c[:, 16:].reshape(-1, 9)], dim=-1) prob = torch.rand((c.shape[0], 1), device=c.device) c_syn = c_syn c_syn_final_out = c_syn.reshape(-1, 3, c_syn.shape[-1]) for batch_index, model_name in enumerate(model_name_list): z_value = z[batch_index].unsqueeze(0) ws = self.G_syn[model_name].mapping(z_value, self.conditioning_params, truncation_psi=self.truncation_psi, truncation_cutoff=14) vert_value = exp_params[batch_index].unsqueeze(0) c = c_syn[batch_index].unsqueeze(0) out = self.G_syn[model_name].synthesis(ws, c, vert_value, noise_mode='const', neural_rendering_resolution=128, return_featmap=True ) # img = (out['image'][0] * 127.5 + 128).clamp(0, 255).to(torch.uint8) # img = img.permute(1, 2, 0) # img = img.cpu().numpy() # img = Image.fromarray(np.uint8(img)) # print('savesavesavesavesavesave') # save_dir = f'/home/liuhongyu/code/HeadArtist2/HeadGallery/training-runs-portrait4d/00073--multi_style-gpus2-batch8/{batch_index}.png' # print(save_dir) # # cv2.imwrite(save_dir, img) # img.save(save_dir) out_sr_img.append(out['image']) out_img_raw.append(out['image_raw']) out_img_depth.append(out['image_depth']) out_static_plane.append(out['static_plane']) out_feature_img.append(out['image_feature']) out_triplane.append(out['triplane']) out_rendering_stitch.append(out['rendering_stitch']) final_out = {'image_sr': torch.cat(out_sr_img), 'image': torch.cat(out_img_raw), 'image_depth': torch.cat(out_img_depth), 'static_plane': torch.cat(out_static_plane), 'image_feature': torch.cat(out_feature_img), 'triplane': torch.cat(out_triplane), 'rendering_stitch': torch.cat(out_rendering_stitch), 'c': c_syn_final_out, # 'motions': out_motion } return final_out def run_G(self, imgs_app, imgs_mot, motions_app, motions, c, mesh, real_cano_tri, real_ref_tri, neural_rendering_resolution, motion_scale=1.0, swapping_prob=0.5, half_static=False): motion_scale = torch.ones([imgs_app.shape[0], 1, 1], device=c.device) * motion_scale if swapping_prob is not None: imgs_app_swapped = imgs_mot prob = torch.rand((imgs_app.shape[0], 1), device=c.device) imgs_app_conditioning = torch.where(prob.reshape(imgs_app.shape[0], 1, 1, 1) < swapping_prob, imgs_app_swapped, imgs_app) motion_scale_conditioning = torch.where(prob.reshape(imgs_app.shape[0], 1, 1) < swapping_prob, torch.zeros_like(motion_scale), motion_scale) motions_app_conditioning = torch.where(prob < swapping_prob, motions, motions_app) else: imgs_app_conditioning = imgs_app motion_scale_conditioning = motion_scale motions_app_conditioning = motions_app # whether or not the second half of the batchsize are static data # If true, set motion scale to zero to deactivate motion-related cross-attention layers. if half_static: num_static = imgs_app.shape[0] // 2 if swapping_prob is None: motion_scale_conditioning = torch.cat([motion_scale[:num_static], motion_scale[num_static:] * 0], dim=0) else: prob = torch.rand((num_static, 1), device=c.device) motion_scale_static = torch.where(prob.reshape(num_static, 1, 1) < 1 - swapping_prob, torch.zeros_like(motion_scale[num_static:]), motion_scale[num_static:]) motion_scale_conditioning = torch.cat([motion_scale_conditioning[:num_static], motion_scale_static], dim=0) gen_output = self.G.synthesis(imgs_app_conditioning, imgs_mot, motions_app_conditioning, motions, c, mesh, real_ref_tri, real_ref_tri, self.w_avg, neural_rendering_resolution=neural_rendering_resolution, motion_scale=motion_scale_conditioning) return gen_output def run_D(self, img, c, blur_sigma=0, blur_sigma_raw=0, update_emas=False): blur_size = np.floor(blur_sigma * 3) if blur_size > 0: with torch.autograd.profiler.record_function('blur'): if self.G.has_superresolution: f = torch.arange(-blur_size, blur_size + 1, device=img['image_sr'].device).div( blur_sigma).square().neg().exp2() img['image_sr'] = upfirdn2d.filter2d(img['image_sr'], f / f.sum()) else: f = torch.arange(-blur_size, blur_size + 1, device=img['image'].device).div( blur_sigma).square().neg().exp2() img['image'] = upfirdn2d.filter2d(img['image'], f / f.sum()) logits = self.D(img, c, update_emas=update_emas) return logits def accumulate_gradients(self, phase, real_img_app, real_img_mot, real_img_recon, real_depth_recon, real_feature_recon, real_triplane_recon, real_c_recon, mesh, motions_app, motions, gain, cur_nimg, real_cano_tri, real_ref_tri, motion_scale=1.0, swapping_prob=0.5, half_static=True ): if self.G.rendering_kwargs.get('density_reg', 0) == 0: phase = {'Greg': 'none', 'Gboth': 'Gmain'}.get(phase, phase) if self.r1_gamma == 0: phase = {'Dreg': 'none', 'Dboth': 'Dmain'}.get(phase, phase) # if self.r1_gamma_patch == 0: # phase = {'D_patchreg': 'none', 'D_patchboth': 'Dmain'}.get(phase, phase) blur_sigma = 0 r1_gamma = self.r1_gamma # r1_gamma_patch = self.r1_gamma_patch # r1_gamma_uv = self.r1_gamma_uv # r1_gamma_seg = self.r1_gamma_seg if self.neural_rendering_resolution_final is not None: alpha = min( max((cur_nimg - self.discrimination_kimg * 1e3) / (self.neural_rendering_resolution_fade_kimg * 1e3), 0), 1) # begin fading when D starts to be optimized neural_rendering_resolution = int(np.rint(self.neural_rendering_resolution_initial * ( 1 - alpha) + self.neural_rendering_resolution_final * alpha)) neural_rendering_resolution_patch = self.neural_rendering_resolution_final else: neural_rendering_resolution = self.neural_rendering_resolution_initial neural_rendering_resolution_patch = neural_rendering_resolution if self.G.has_superresolution: real_img_raw = filtered_resizing(real_img_recon, size=neural_rendering_resolution, f=self.resample_filter, filter_mode=self.filter_mode) if self.blur_raw_target and blur_sigma > 0: blur_size = np.floor(blur_sigma * 3) if blur_size > 0: f = torch.arange(-blur_size, blur_size + 1, device=real_img_raw.device).div( blur_sigma).square().neg().exp2() real_img_raw = upfirdn2d.filter2d(real_img_raw, f / f.sum()) real_img = {'image_sr': real_img_recon, 'image': real_img_raw, } else: real_img = {'image': real_img_recon} # Gmain: Maximize logits for generated images. if phase in ['Gmain', 'Gboth']: with torch.autograd.profiler.record_function('Gmain_forward'): gen_img = self.run_G(real_img_app, real_img_mot, motions_app, motions, real_c_recon, mesh, real_cano_tri, real_ref_tri, neural_rendering_resolution=neural_rendering_resolution, motion_scale=motion_scale, swapping_prob=swapping_prob, half_static=half_static) # main image-level reconstruction loss gen_img_recon = gen_img['image_sr'] gen_img_recon_raw = gen_img['image'] gen_depth = gen_img['image_depth'] gen_feature = gen_img['image_feature'] gen_triplane_recon = gen_img['triplane'] loss_recon_lpips = self.lpips(gen_img_recon, real_img_recon) + self.lpips(gen_img_recon_raw, real_img_raw) training_stats.report('Loss/G/lrecon_lpips', loss_recon_lpips) loss_recon_l1 = torch.abs(gen_img_recon - real_img_recon).mean() + torch.abs( gen_img_recon_raw - real_img_raw).mean() training_stats.report('Loss/G/lrecon_l1', loss_recon_l1) # use id loss after seeing 400k images if cur_nimg < 400 * 1e3: loss_id = 0 else: loss_id = self.id_loss(gen_img_recon, real_img_recon) training_stats.report('G_Loss/real/loss_id', loss_id) # use depth loss before seeing 400k images if real_depth_recon is not None: if real_depth_recon.shape != gen_depth.shape: real_depth_recon = F.interpolate(real_depth_recon, size=[gen_depth.shape[2], gen_depth.shape[3]], mode='bilinear', antialias=True) loss_recon_depth = torch.abs( (real_depth_recon - gen_depth)).mean() training_stats.report('Loss/G/lrecon_depth', loss_recon_depth) else: loss_recon_depth = 0. # use feature map loss before seeing 400k images if real_feature_recon is not None: if real_feature_recon.shape != gen_feature.shape: real_feature_recon = F.interpolate(real_feature_recon, size=[gen_feature.shape[2], gen_feature.shape[3]], mode='bilinear', antialias=True) loss_recon_feature = torch.abs(real_feature_recon - gen_feature).mean() training_stats.report('Loss/G/lrecon_feature', loss_recon_feature) else: loss_recon_feature = 0. # use triplane feature loss before seeing 400k images if real_triplane_recon is not None: loss_recon_triplane = torch.abs(real_triplane_recon - gen_triplane_recon).mean() training_stats.report('Loss/G/lrecon_triplane', loss_recon_triplane) else: loss_recon_triplane = 0. loss_recon = loss_recon_lpips + loss_recon_l1 + loss_recon_depth + loss_recon_feature + loss_recon_triplane*0.1 + loss_id # adversarial loss after warm-up stage if cur_nimg >= self.discrimination_kimg * 1e3 and self.use_D: gen_logits = self.run_D(gen_img, real_c_recon, blur_sigma=blur_sigma) loss_Gmain = torch.nn.functional.softplus(-gen_logits) training_stats.report('Loss/scores/fake', gen_logits) training_stats.report('Loss/signs/fake', gen_logits.sign()) training_stats.report('Loss/G/loss', loss_Gmain) else: loss_Gmain = None with torch.autograd.profiler.record_function('Gmain_backward'): loss_G = loss_recon.mean() if loss_Gmain is not None: loss_G += loss_Gmain.mean() * self.gmain loss_G.mul(gain).backward() # # Density Regularization if phase in ['Greg', 'Gboth'] and self.G.rendering_kwargs.get('density_reg', 0) > 0 and self.G.rendering_kwargs[ 'reg_type'] == 'l1': initial_coordinates = torch.rand((real_c_recon.shape[0], 1000, 3), device=real_c_recon.device) * 2 - 1 perturbed_coordinates = initial_coordinates + torch.randn_like(initial_coordinates) * \ self.G.rendering_kwargs['density_reg_p_dist'] all_coordinates = torch.cat([initial_coordinates, perturbed_coordinates], dim=1) motion_scale = torch.ones([real_img_app.shape[0], 1, 1], device=real_img_app.device) * motion_scale if swapping_prob is not None: real_img_app_swapped = real_img_mot prob = torch.rand((real_img_app.shape[0], 1), device=real_img_app.device) real_img_app_conditioning = torch.where(prob.reshape(real_img_app.shape[0], 1, 1, 1) < swapping_prob, real_img_app_swapped, real_img_app) motion_scale_conditioning = torch.where(prob.reshape(real_img_app.shape[0], 1, 1) < swapping_prob, torch.zeros_like(motion_scale), motion_scale) motions_app_conditioning = torch.where(prob < swapping_prob, motions, motions_app) else: real_img_app_conditioning = real_img_app motion_scale_conditioning = motion_scale motions_app_conditioning = motions_app if half_static: num_static = real_img_app.shape[0] // 2 if swapping_prob is None: motion_scale_conditioning = torch.cat([motion_scale[:num_static], motion_scale[num_static:] * 0], dim=0) else: prob = torch.rand((num_static, 1), device=real_img_app.device) motion_scale_static = torch.where(prob.reshape(num_static, 1, 1) < 1 - swapping_prob, torch.zeros_like(motion_scale[num_static:]), motion_scale[num_static:]) motion_scale_conditioning = torch.cat([motion_scale_conditioning[:num_static], motion_scale_static], dim=0) out = self.G.sample_mixed(real_img_app_conditioning, real_img_mot, mesh, self.w_avg, motions_app_conditioning, motions, all_coordinates, torch.randn_like(all_coordinates), real_cano_tri, real_ref_tri, motion_scale=motion_scale_conditioning) if isinstance(out, tuple): TVloss = 0 for out_ in out: sigma = out_['sigma'][:, :initial_coordinates.shape[1] * 2] sigma_initial = sigma[:, :sigma.shape[1] // 2] sigma_perturbed = sigma[:, sigma.shape[1] // 2:] TVloss += torch.nn.functional.l1_loss(sigma_initial, sigma_perturbed) * self.G.rendering_kwargs[ 'density_reg'] / len(out) training_stats.report('Loss/G/TVloss', TVloss) else: sigma = out['sigma'][:, :initial_coordinates.shape[1] * 2] sigma_initial = sigma[:, :sigma.shape[1] // 2] sigma_perturbed = sigma[:, sigma.shape[1] // 2:] TVloss = torch.nn.functional.l1_loss(sigma_initial, sigma_perturbed) * self.G.rendering_kwargs[ 'density_reg'] training_stats.report('Loss/G/TVloss', TVloss) (TVloss).mul(gain).backward() # Dmain: Minimize logits for generated images. if cur_nimg >= self.discrimination_kimg * 1e3 and self.use_D: loss_Dgen = 0 if phase in ['Dmain', 'Dboth']: with torch.autograd.profiler.record_function('Dgen_forward'): gen_img = self.run_G(real_img_app, real_img_mot, motions_app, motions, real_c_recon, mesh, real_cano_tri, real_ref_tri, neural_rendering_resolution=neural_rendering_resolution, motion_scale=motion_scale, swapping_prob=swapping_prob, half_static=half_static) gen_logits = self.run_D(gen_img, real_c_recon, blur_sigma=blur_sigma, update_emas=True) training_stats.report('Loss/scores/fake', gen_logits) training_stats.report('Loss/signs/fake', gen_logits.sign()) loss_Dgen = torch.nn.functional.softplus(gen_logits) with torch.autograd.profiler.record_function('Dgen_backward'): loss_Dgen.mean().mul(gain).backward() # Dmain: Maximize logits for real images. # Dr1: Apply R1 regularization. if phase in ['Dmain', 'Dreg', 'Dboth']: name = 'Dreal' if phase == 'Dmain' else 'Dr1' if phase == 'Dreg' else 'Dreal_Dr1' with torch.autograd.profiler.record_function(name + '_forward'): real_img_tmp_image = real_img['image_sr'].detach().requires_grad_(phase in ['Dreg', 'Dboth']) real_img_tmp_image_raw = real_img['image'].detach().requires_grad_(phase in ['Dreg', 'Dboth']) real_img_tmp = {'image_sr': real_img_tmp_image, 'image': real_img_tmp_image_raw} real_logits = self.run_D(real_img_tmp, real_c_recon, blur_sigma=blur_sigma) training_stats.report('Loss/scores/real', real_logits) training_stats.report('Loss/signs/real', real_logits.sign()) loss_Dreal = 0 if phase in ['Dmain', 'Dboth']: loss_Dreal = torch.nn.functional.softplus(-real_logits) training_stats.report('Loss/D/loss', loss_Dgen + loss_Dreal) loss_Dr1 = 0 if phase in ['Dreg', 'Dboth']: if self.dual_discrimination: with torch.autograd.profiler.record_function( 'r1_grads'), conv2d_gradfix.no_weight_gradients(): r1_grads = torch.autograd.grad(outputs=[real_logits.sum()], inputs=[real_img_tmp['image_sr'], real_img_tmp['image']], create_graph=True, only_inputs=True) r1_grads_image = r1_grads[0] r1_grads_image_raw = r1_grads[1] r1_penalty = r1_grads_image.square().sum([1, 2, 3]) + r1_grads_image_raw.square().sum( [1, 2, 3]) else: # single discrimination with torch.autograd.profiler.record_function( 'r1_grads'), conv2d_gradfix.no_weight_gradients(): if self.G.has_superresolution: r1_grads = torch.autograd.grad(outputs=[real_logits.sum()], inputs=[real_img_tmp['image_sr']], create_graph=True, only_inputs=True) else: r1_grads = torch.autograd.grad(outputs=[real_logits.sum()], inputs=[real_img_tmp['image']], create_graph=True, only_inputs=True) r1_grads_image = r1_grads[0] r1_penalty = r1_grads_image.square().sum([1, 2, 3]) loss_Dr1 = r1_penalty * (r1_gamma / 2) training_stats.report('Loss/r1_penalty', r1_penalty) training_stats.report('Loss/D/reg', loss_Dr1) with torch.autograd.profiler.record_function(name + '_backward'): (loss_Dreal + loss_Dr1).mean().mul(gain).backward()