Jordan Legg commited on
Commit
5b33905
Β·
1 Parent(s): 409e82d

console logging for txt2img

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -42,12 +42,32 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
42
  print(f"Using seed: {seed}")
43
  generator = torch.Generator().manual_seed(seed)
44
 
45
- # Get the expected image size for the VAE
46
- vae_image_size = pipe.vae.config.sample_size
47
- print(f"Expected VAE image size: {vae_image_size}")
48
-
49
- if init_image is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  print("Initial image provided, processing img2img")
 
 
51
  init_image = init_image.convert("RGB")
52
  init_image = preprocess_image(init_image, vae_image_size)
53
  latents = encode_image(init_image, pipe.vae)
@@ -66,11 +86,13 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
66
  latents = conv(latents)
67
  print(f"Latents shape after channel conversion: {latents.shape}")
68
 
 
 
 
69
  # Reshape latents to match the transformer's input expectations
70
- latents = latents.view(1, 64, height // 8, width // 8)
71
- print(f"Latents shape after reshaping: {latents.shape}")
72
 
73
- # Avoid flattening, ensure latents are in the expected shape for the transformer
74
  # Adding extra debug to understand what transformer expects
75
  try:
76
  print("Calling the transformer with latents")
@@ -91,16 +113,6 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
91
  guidance_scale=0.0,
92
  latents=latents
93
  ).images[0]
94
- else:
95
- print("No initial image provided, processing text2img")
96
- image = pipe(
97
- prompt=prompt,
98
- height=height,
99
- width=width,
100
- num_inference_steps=num_inference_steps,
101
- generator=generator,
102
- guidance_scale=0.0
103
- ).images[0]
104
 
105
  print("Inference complete")
106
  return image, seed
@@ -109,6 +121,7 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
109
 
110
 
111
 
 
112
  # Define example prompts
113
  examples = [
114
  "a tiny astronaut hatching from an egg on the moon",
 
42
  print(f"Using seed: {seed}")
43
  generator = torch.Generator().manual_seed(seed)
44
 
45
+ if init_image is None:
46
+ print("No initial image provided, processing text2img")
47
+ # Process text2img
48
+ try:
49
+ print("Calling the diffusion pipeline without latents")
50
+ result = pipe(
51
+ prompt=prompt,
52
+ height=height,
53
+ width=width,
54
+ num_inference_steps=num_inference_steps,
55
+ generator=generator,
56
+ guidance_scale=0.0
57
+ )
58
+ image = result.images[0]
59
+ latents = result.latents
60
+
61
+ # Log the latent shapes from text2img process
62
+ print(f"Latents shape from text2img: {latents.shape}")
63
+ except Exception as e:
64
+ print(f"Pipeline call failed with error: {e}")
65
+ raise
66
+
67
+ else:
68
  print("Initial image provided, processing img2img")
69
+ vae_image_size = pipe.vae.config.sample_size
70
+ print(f"Expected VAE image size: {vae_image_size}")
71
  init_image = init_image.convert("RGB")
72
  init_image = preprocess_image(init_image, vae_image_size)
73
  latents = encode_image(init_image, pipe.vae)
 
86
  latents = conv(latents)
87
  print(f"Latents shape after channel conversion: {latents.shape}")
88
 
89
+ # Debugging input shape before calling transformer
90
+ print(f"Latents shape before reshaping for transformer: {latents.shape}")
91
+
92
  # Reshape latents to match the transformer's input expectations
93
+ latents = latents.permute(0, 2, 3, 1).contiguous().view(-1, 64) # Assuming the transformer expects (batch, sequence, feature)
94
+ print(f"Latents shape after reshaping for transformer: {latents.shape}")
95
 
 
96
  # Adding extra debug to understand what transformer expects
97
  try:
98
  print("Calling the transformer with latents")
 
113
  guidance_scale=0.0,
114
  latents=latents
115
  ).images[0]
 
 
 
 
 
 
 
 
 
 
116
 
117
  print("Inference complete")
118
  return image, seed
 
121
 
122
 
123
 
124
+
125
  # Define example prompts
126
  examples = [
127
  "a tiny astronaut hatching from an egg on the moon",