File size: 1,742 Bytes
407f62e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
how to use
import torch
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from diffusers import AutoencoderDC
from torchvision.utils import save_image
import numpy as np
import h5py
dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers", torch_dtype=torch.float32).to('cuda')
with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f:
print(list(f.keys()))
dataset = f['image_latents'][:]
latents=np.expand_dims(dataset[5]*35, axis=0)
latents=torch.from_numpy(latents)
latents=latents.float()
latents=latents.to('cuda')
y = dc_encoder.decode(latents).sample
save_image(y * 0.5 + 0.5, "demo_dc_ae.png")
import clip
model, _ = clip.load("ViT-L/14")
model.to('cuda')
def encode_text(label, model, device):
text_tokens = clip.tokenize(label, truncate=True).to(device)
text_encoding = model.encode_text(text_tokens)
return text_encoding.cpu()
import h5py
# Open the file
f = h5py.File('latent_folder/text_encodings.hdf5', 'r')
# View the contents
print(list(f.keys()))
# Access specific datasets
dataset = f['text_encodings'][:]
# Close the file when done
f.close()
def cosine_similarity(v1, v2):
# Reshape v1 to match v2's dimensions if needed
v1 = v1.reshape(-1) # converts (1,768) to (768,)
# Calculate cosine similarity
dot_product = np.dot(v1, v2)
norm_v1 = np.linalg.norm(v1)
norm_v2 = np.linalg.norm(v2)
return dot_product / (norm_v1 * norm_v2)
textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda')
cosine_similarity(textembed.cpu().detach().numpy() , dataset[5])
dataset link https://huggingface.co/datasets/zzliang/GRIT
|