Vittorio Pippi commited on
Commit
605e556
·
1 Parent(s): 5f68c1b

Enhance README with image loading function and update model usage; add inference mode to generate methods

Browse files
Files changed (2) hide show
  1. README.md +16 -12
  2. modeling_emuru.py +2 -0
README.md CHANGED
@@ -56,25 +56,29 @@ library_name: t5
56
  Below is a minimal usage example in Python. You can load the model with `AutoModel.from_pretrained(...)` and simply call `.generate(...)` or `.generate_batch(...)` to create images.
57
 
58
  ```python
59
- import torch
60
  from PIL import Image
61
  from transformers import AutoModel
 
62
  from torchvision.transforms import functional as F
63
 
 
 
 
 
 
 
 
 
64
  # 1. Load the model
65
- model = AutoModel.from_pretrained("blowing-up-groundhogs/emuru")
66
  model.cuda() # Move to GPU if available
67
 
68
  # 2. Prepare your inputs
69
- style_text = "A beautiful watercolor style"
70
- gen_text = "A majestic mountain with a rainbow"
71
- style_img = Image.open("my_style_image.png").convert("RGB")
72
-
73
- # Convert the style image to a suitable tensor
74
- style_img = F.to_tensor(style_img)
75
- style_img = F.resize((style_img.width * 64 // style_img.height, 64)) # Example resize
76
- style_img = F.normalize(style_img, [0.5], [0.5]) # Normalize to [-1, 1]
77
- style_img = style_img.unsqueeze(0).cuda()
78
 
79
  # 3. Generate an image
80
  generated_pil_image = model.generate(
@@ -84,7 +88,7 @@ generated_pil_image = model.generate(
84
  max_new_tokens=64
85
  )
86
 
87
- # 4. Save or display the result
88
  generated_pil_image.save("generated_image.png")
89
  ```
90
 
 
56
  Below is a minimal usage example in Python. You can load the model with `AutoModel.from_pretrained(...)` and simply call `.generate(...)` or `.generate_batch(...)` to create images.
57
 
58
  ```python
 
59
  from PIL import Image
60
  from transformers import AutoModel
61
+ from huggingface_hub import hf_hub_download
62
  from torchvision.transforms import functional as F
63
 
64
+ def load_image(img_path):
65
+ img = Image.open(img_path).convert("RGB")
66
+ # Resize the image to have a fixed height of 64 pixels
67
+ img = img.resize((img.width * 64 // img.height, 64))
68
+ img = F.to_tensor(img)
69
+ img = F.normalize(img, [0.5], [0.5])
70
+ return img.unsqueeze(0)
71
+
72
  # 1. Load the model
73
+ model = AutoModel.from_pretrained("blowing-up-groundhogs/emuru", trust_remote_code=True)
74
  model.cuda() # Move to GPU if available
75
 
76
  # 2. Prepare your inputs
77
+ style_text = 'THE JOLLY IS "U"'
78
+ gen_text = 'EMURU'
79
+ img_path = hf_hub_download(repo_id="blowing-up-groundhogs/emuru", filename="sample.png")
80
+ style_img = load_image(img_path)
81
+ style_img = style_img.cuda()
 
 
 
 
82
 
83
  # 3. Generate an image
84
  generated_pil_image = model.generate(
 
88
  max_new_tokens=64
89
  )
90
 
91
+ # 4. Save the result
92
  generated_pil_image.save("generated_image.png")
93
  ```
94
 
modeling_emuru.py CHANGED
@@ -108,6 +108,7 @@ class Emuru(PreTrainedModel):
108
  mse_loss = self.mse_criterion(vae_latent, z_sequence)
109
  return mse_loss, pred_latent, z
110
 
 
111
  def generate(
112
  self,
113
  style_text: str,
@@ -139,6 +140,7 @@ class Emuru(PreTrainedModel):
139
  imgs = (imgs + 1) / 2
140
  return F.to_pil_image(imgs[0, ..., style_img.size(-1):img_ends.item()].detach().cpu())
141
 
 
142
  def generate_batch(
143
  self,
144
  style_texts: List[str],
 
108
  mse_loss = self.mse_criterion(vae_latent, z_sequence)
109
  return mse_loss, pred_latent, z
110
 
111
+ @torch.inference_mode()
112
  def generate(
113
  self,
114
  style_text: str,
 
140
  imgs = (imgs + 1) / 2
141
  return F.to_pil_image(imgs[0, ..., style_img.size(-1):img_ends.item()].detach().cpu())
142
 
143
+ @torch.inference_mode()
144
  def generate_batch(
145
  self,
146
  style_texts: List[str],