rishabh063
/

dc_ae_vit_l

Model card Files Files and versions Community

File size: 1,742 Bytes

407f62e


how to use

import torch
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from diffusers import AutoencoderDC
from torchvision.utils import save_image
import numpy as np
import h5py



dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers",  torch_dtype=torch.float32).to('cuda')




with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f:
    print(list(f.keys()))
    dataset = f['image_latents'][:]



latents=np.expand_dims(dataset[5]*35, axis=0)
latents=torch.from_numpy(latents)
latents=latents.float()
latents=latents.to('cuda')


y = dc_encoder.decode(latents).sample

save_image(y * 0.5 + 0.5, "demo_dc_ae.png")




import clip
model, _ = clip.load("ViT-L/14")

model.to('cuda')


def encode_text(label, model, device):
    text_tokens = clip.tokenize(label, truncate=True).to(device)
    text_encoding = model.encode_text(text_tokens)
    return text_encoding.cpu()


import h5py

# Open the file
f = h5py.File('latent_folder/text_encodings.hdf5', 'r')

# View the contents
print(list(f.keys()))

# Access specific datasets
dataset = f['text_encodings'][:]

# Close the file when done
f.close()



def cosine_similarity(v1, v2):
    # Reshape v1 to match v2's dimensions if needed
    v1 = v1.reshape(-1)  # converts (1,768) to (768,)
    
    # Calculate cosine similarity
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    
    return dot_product / (norm_v1 * norm_v2)



textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda')


cosine_similarity(textembed.cpu().detach().numpy() , dataset[5])



dataset link https://huggingface.co/datasets/zzliang/GRIT