Spaces:

AbstractPhil
/

shunt-adapter-testing

Runtime error

App Files Files Community

AbstractPhil commited on Jun 1

Commit

25bf19b

1 Parent(s): db851e8

yes

Browse files

Files changed (1) hide show

app.py +6 -23

app.py CHANGED Viewed

@@ -111,34 +111,16 @@ def encode_sdxl_prompt(prompt, negative_prompt=""):
         clip_l_embeds = pipe.text_encoder(tokens_l)[0]
         neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
-        # CLIP-G embeddings (1280d) - debug the output structure
         clip_g_output = pipe.text_encoder_2(tokens_g)
-        print(f"CLIP-G output type: {type(clip_g_output)}")
-        print(f"CLIP-G output length: {len(clip_g_output) if hasattr(clip_g_output, '__len__') else 'no len'}")
-        if hasattr(clip_g_output, '__len__') and len(clip_g_output) > 0:
-            print(f"CLIP-G [0] shape: {clip_g_output[0].shape}")
-            if len(clip_g_output) > 1:
-                print(f"CLIP-G [1] shape: {clip_g_output[1].shape}")
-        # Try different ways to get the sequence embeddings
-        if hasattr(clip_g_output, 'last_hidden_state'):
-            clip_g_embeds = clip_g_output.last_hidden_state
-        elif hasattr(clip_g_output, '__len__') and len(clip_g_output) > 0:
-            clip_g_embeds = clip_g_output[0]
-        else:
-            clip_g_embeds = clip_g_output
         neg_clip_g_output = pipe.text_encoder_2(neg_tokens_g)
-        if hasattr(neg_clip_g_output, 'last_hidden_state'):
-            neg_clip_g_embeds = neg_clip_g_output.last_hidden_state
-        elif hasattr(neg_clip_g_output, '__len__') and len(neg_clip_g_output) > 0:
-            neg_clip_g_embeds = neg_clip_g_output[0]
-        else:
-            neg_clip_g_embeds = neg_clip_g_output
         # Pooled embeddings for SDXL
-        pooled_embeds = clip_g_output[1] if hasattr(clip_g_output, '__len__') and len(clip_g_output) > 1 else clip_g_output.pooler_output
-        neg_pooled_embeds = neg_clip_g_output[1] if hasattr(neg_clip_g_output, '__len__') and len(neg_clip_g_output) > 1 else neg_clip_g_output.pooler_output
     return {
         "clip_l": clip_l_embeds,
@@ -233,6 +215,7 @@ def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noi
         guidance_scale=cfg_scale,
         width=width,
         height=height,
         generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
     ).images[0]

         clip_l_embeds = pipe.text_encoder(tokens_l)[0]
         neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
+        # CLIP-G embeddings (1280d) - [0] is pooled, [1] is sequence (opposite of CLIP-L)
         clip_g_output = pipe.text_encoder_2(tokens_g)
+        clip_g_embeds = clip_g_output[1]  # sequence embeddings
         neg_clip_g_output = pipe.text_encoder_2(neg_tokens_g)
+        neg_clip_g_embeds = neg_clip_g_output[1]  # sequence embeddings
         # Pooled embeddings for SDXL
+        pooled_embeds = clip_g_output[0]  # pooled embeddings
+        neg_pooled_embeds = neg_clip_g_output[0]  # pooled embeddings
     return {
         "clip_l": clip_l_embeds,
         guidance_scale=cfg_scale,
         width=width,
         height=height,
+        num_images_per_prompt=1,  # Explicitly set this
         generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
     ).images[0]