Vittorio Pippi
commited on
Commit
·
605e556
1
Parent(s):
5f68c1b
Enhance README with image loading function and update model usage; add inference mode to generate methods
Browse files- README.md +16 -12
- modeling_emuru.py +2 -0
README.md
CHANGED
@@ -56,25 +56,29 @@ library_name: t5
|
|
56 |
Below is a minimal usage example in Python. You can load the model with `AutoModel.from_pretrained(...)` and simply call `.generate(...)` or `.generate_batch(...)` to create images.
|
57 |
|
58 |
```python
|
59 |
-
import torch
|
60 |
from PIL import Image
|
61 |
from transformers import AutoModel
|
|
|
62 |
from torchvision.transforms import functional as F
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# 1. Load the model
|
65 |
-
model = AutoModel.from_pretrained("blowing-up-groundhogs/emuru")
|
66 |
model.cuda() # Move to GPU if available
|
67 |
|
68 |
# 2. Prepare your inputs
|
69 |
-
style_text =
|
70 |
-
gen_text =
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
style_img = F.to_tensor(style_img)
|
75 |
-
style_img = F.resize((style_img.width * 64 // style_img.height, 64)) # Example resize
|
76 |
-
style_img = F.normalize(style_img, [0.5], [0.5]) # Normalize to [-1, 1]
|
77 |
-
style_img = style_img.unsqueeze(0).cuda()
|
78 |
|
79 |
# 3. Generate an image
|
80 |
generated_pil_image = model.generate(
|
@@ -84,7 +88,7 @@ generated_pil_image = model.generate(
|
|
84 |
max_new_tokens=64
|
85 |
)
|
86 |
|
87 |
-
# 4. Save
|
88 |
generated_pil_image.save("generated_image.png")
|
89 |
```
|
90 |
|
|
|
56 |
Below is a minimal usage example in Python. You can load the model with `AutoModel.from_pretrained(...)` and simply call `.generate(...)` or `.generate_batch(...)` to create images.
|
57 |
|
58 |
```python
|
|
|
59 |
from PIL import Image
|
60 |
from transformers import AutoModel
|
61 |
+
from huggingface_hub import hf_hub_download
|
62 |
from torchvision.transforms import functional as F
|
63 |
|
64 |
+
def load_image(img_path):
|
65 |
+
img = Image.open(img_path).convert("RGB")
|
66 |
+
# Resize the image to have a fixed height of 64 pixels
|
67 |
+
img = img.resize((img.width * 64 // img.height, 64))
|
68 |
+
img = F.to_tensor(img)
|
69 |
+
img = F.normalize(img, [0.5], [0.5])
|
70 |
+
return img.unsqueeze(0)
|
71 |
+
|
72 |
# 1. Load the model
|
73 |
+
model = AutoModel.from_pretrained("blowing-up-groundhogs/emuru", trust_remote_code=True)
|
74 |
model.cuda() # Move to GPU if available
|
75 |
|
76 |
# 2. Prepare your inputs
|
77 |
+
style_text = 'THE JOLLY IS "U"'
|
78 |
+
gen_text = 'EMURU'
|
79 |
+
img_path = hf_hub_download(repo_id="blowing-up-groundhogs/emuru", filename="sample.png")
|
80 |
+
style_img = load_image(img_path)
|
81 |
+
style_img = style_img.cuda()
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# 3. Generate an image
|
84 |
generated_pil_image = model.generate(
|
|
|
88 |
max_new_tokens=64
|
89 |
)
|
90 |
|
91 |
+
# 4. Save the result
|
92 |
generated_pil_image.save("generated_image.png")
|
93 |
```
|
94 |
|
modeling_emuru.py
CHANGED
@@ -108,6 +108,7 @@ class Emuru(PreTrainedModel):
|
|
108 |
mse_loss = self.mse_criterion(vae_latent, z_sequence)
|
109 |
return mse_loss, pred_latent, z
|
110 |
|
|
|
111 |
def generate(
|
112 |
self,
|
113 |
style_text: str,
|
@@ -139,6 +140,7 @@ class Emuru(PreTrainedModel):
|
|
139 |
imgs = (imgs + 1) / 2
|
140 |
return F.to_pil_image(imgs[0, ..., style_img.size(-1):img_ends.item()].detach().cpu())
|
141 |
|
|
|
142 |
def generate_batch(
|
143 |
self,
|
144 |
style_texts: List[str],
|
|
|
108 |
mse_loss = self.mse_criterion(vae_latent, z_sequence)
|
109 |
return mse_loss, pred_latent, z
|
110 |
|
111 |
+
@torch.inference_mode()
|
112 |
def generate(
|
113 |
self,
|
114 |
style_text: str,
|
|
|
140 |
imgs = (imgs + 1) / 2
|
141 |
return F.to_pil_image(imgs[0, ..., style_img.size(-1):img_ends.item()].detach().cpu())
|
142 |
|
143 |
+
@torch.inference_mode()
|
144 |
def generate_batch(
|
145 |
self,
|
146 |
style_texts: List[str],
|