Spaces:

AbstractPhil
/

shunt-adapter-testing

Running on Zero

App Files Files Community

AbstractPhil commited on 12 days ago

Commit

d3c4f78

1 Parent(s): dfcfa0d

yes

Browse files

Files changed (2) hide show

__pycache__/two_stream_shunt_adapter.cpython-310.pyc +0 -0
app.py +107 -104

__pycache__/two_stream_shunt_adapter.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/two_stream_shunt_adapter.cpython-310.pyc and b/__pycache__/two_stream_shunt_adapter.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -131,123 +131,126 @@ def encode_sdxl_prompt(prompt, negative_prompt=""):
 # ─── Inference ────────────────────────────────────────────
-@torch.no_grad()
 @spaces.GPU
-def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noise, gate_prob,
-          use_anchor, steps, cfg_scale, scheduler_name, width, height, seed):
-    # Initialize device and models inside GPU context
     global t5_tok, t5_mod, pipe
     device = torch.device("cuda")
     dtype = torch.float16
-    # Load models if not already loaded
-    if t5_tok is None:
-        t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
-        t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base").to(device).eval()
-    if pipe is None:
-        pipe = StableDiffusionXLPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0",
-            torch_dtype=dtype,
-            variant="fp16",
-            use_safetensors=True
-        ).to(device)
-    # Set seed for reproducibility
-    if seed != -1:
-        torch.manual_seed(seed)
-        np.random.seed(seed)
-    # Set scheduler
-    if scheduler_name in SCHEDULERS:
-        pipe.scheduler = SCHEDULERS[scheduler_name].from_config(pipe.scheduler.config)
-    # Get T5 embeddings for semantic understanding - standardize to 77 tokens like CLIP
-    t5_ids = t5_tok(
-        prompt,
-        return_tensors="pt",
-        padding="max_length",
-        max_length=77,
-        truncation=True
-    ).input_ids.to(device)
-    t5_seq = t5_mod(t5_ids).last_hidden_state
-    # Get proper SDXL CLIP embeddings
-    clip_embeds = encode_sdxl_prompt(prompt, negative_prompt)
-    # Debug shapes
-    print(f"T5 seq shape: {t5_seq.shape}")
-    print(f"CLIP-L shape: {clip_embeds['clip_l'].shape}")
-    print(f"CLIP-G shape: {clip_embeds['clip_g'].shape}")
-    # Load adapters
-    adapter_l = load_adapter(repo_l, adapter_l_file, config_l).to(device) if adapter_l_file else None
-    adapter_g = load_adapter(repo_g, adapter_g_file, config_g).to(device) if adapter_g_file else None
-    # Apply CLIP-L adapter
-    if adapter_l is not None:
-        anchor_l, delta_l, log_sigma_l, attn_l1, attn_l2, tau_l, g_pred_l, gate_l = adapter_l(t5_seq, clip_embeds["clip_l"])
-        gate_l_scaled = gate_l * gate_prob
-        delta_l_final = delta_l * strength * gate_l_scaled
-        clip_l_mod = clip_embeds["clip_l"] + delta_l_final
-        if use_anchor:
-            clip_l_mod = clip_l_mod * (1 - gate_l_scaled) + anchor_l * gate_l_scaled
-        if noise > 0:
-            clip_l_mod += torch.randn_like(clip_l_mod) * noise
-    else:
-        clip_l_mod = clip_embeds["clip_l"]
-        delta_l_final = torch.zeros_like(clip_embeds["clip_l"])
-        gate_l_scaled = torch.zeros_like(clip_embeds["clip_l"])
-        g_pred_l = torch.tensor(0.0)
-        tau_l = torch.tensor(0.0)
-    # Apply CLIP-G adapter
-    if adapter_g is not None:
-        anchor_g, delta_g, log_sigma_g, attn_g1, attn_g2, tau_g, g_pred_g, gate_g = adapter_g(t5_seq, clip_embeds["clip_g"])
-        gate_g_scaled = gate_g * gate_prob
-        delta_g_final = delta_g * strength * gate_g_scaled
-        clip_g_mod = clip_embeds["clip_g"] + delta_g_final
-        if use_anchor:
-            clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
-        if noise > 0:
-            clip_g_mod += torch.randn_like(clip_g_mod) * noise
-    else:
-        clip_g_mod = clip_embeds["clip_g"]
-        delta_g_final = torch.zeros_like(clip_embeds["clip_g"])
-        gate_g_scaled = torch.zeros_like(clip_embeds["clip_g"])
-        g_pred_g = torch.tensor(0.0)
-        tau_g = torch.tensor(0.0)
-    # Combine embeddings in SDXL format: [CLIP-L(768) + CLIP-G(1280)] = 2048
-    prompt_embeds = torch.cat([clip_l_mod, clip_g_mod], dim=-1).to(dtype)
-    neg_embeds = torch.cat([clip_embeds["neg_clip_l"], clip_embeds["neg_clip_g"]], dim=-1).to(dtype)
-    # Generate image with proper SDXL parameters
-    image = pipe(
-        prompt_embeds=prompt_embeds,
-        pooled_prompt_embeds=clip_embeds["pooled"],
-        negative_prompt_embeds=neg_embeds,
-        negative_pooled_prompt_embeds=clip_embeds["neg_pooled"],
-        num_inference_steps=steps,
-        guidance_scale=cfg_scale,
-        width=width,
-        height=height,
-        num_images_per_prompt=1,  # Explicitly set this
-        generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
-    ).images[0]
     return (
         image,
         plot_heat(delta_l_final.squeeze().cpu().numpy(), "Δ CLIP-L"),
-        plot_heat(gate_l_scaled.squeeze().cpu().numpy(), "Gate CLIP-L"),
         plot_heat(delta_g_final.squeeze().cpu().numpy(), "Δ CLIP-G"),
         plot_heat(gate_g_scaled.squeeze().cpu().numpy(), "Gate CLIP-G"),
         f"g_pred_l: {g_pred_l.mean().item():.3f}, τ_l: {tau_l.mean().item():.3f}",
         f"g_pred_g: {g_pred_g.mean().item():.3f}, τ_g: {tau_g.mean().item():.3f}"
     )
 # ─── Gradio Interface ─────────────────────────────────────────
 with gr.Blocks(title="SDXL Dual Shunt Adapter", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 SDXL Dual Shunt Adapter • T5→CLIP Enhancement")

 # ─── Inference ────────────────────────────────────────────
 @spaces.GPU
+def infer(
+    prompt, negative_prompt, adapter_l_file, adapter_g_file,
+    strength, noise, gate_prob, use_anchor,
+    steps, cfg_scale, scheduler_name,
+    width, height, seed
+):
+    import torch
+    import numpy as np
     global t5_tok, t5_mod, pipe
     device = torch.device("cuda")
     dtype = torch.float16
+    with torch.no_grad():
+        # Initialize tokenizer and model
+        if t5_tok is None:
+            t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
+            t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base").to(device).eval()
+        if pipe is None:
+            pipe = StableDiffusionXLPipeline.from_pretrained(
+                "stabilityai/stable-diffusion-xl-base-1.0",
+                torch_dtype=dtype,
+                variant="fp16",
+                use_safetensors=True
+            ).to(device)
+        # Reproducibility
+        if seed != -1:
+            torch.manual_seed(seed)
+            np.random.seed(seed)
+        # Scheduler
+        if scheduler_name in SCHEDULERS:
+            pipe.scheduler = SCHEDULERS[scheduler_name].from_config(pipe.scheduler.config)
+        # T5 embeddings
+        t5_ids = t5_tok(
+            prompt, return_tensors="pt",
+            padding="max_length", max_length=77, truncation=True
+        ).input_ids.to(device)
+        t5_seq = t5_mod(t5_ids).last_hidden_state
+        # CLIP embeddings
+        clip_embeds = encode_sdxl_prompt(prompt, negative_prompt)
+        # Debug shapes
+        print(f"T5 seq shape: {t5_seq.shape}")
+        print(f"CLIP-L shape: {clip_embeds['clip_l'].shape}")
+        print(f"CLIP-G shape: {clip_embeds['clip_g'].shape}")
+        # Load adapters
+        adapter_l = load_adapter(repo_l, adapter_l_file, config_l).to(device) if adapter_l_file else None
+        adapter_g = load_adapter(repo_g, adapter_g_file, config_g).to(device) if adapter_g_file else None
+        # ---- Adapter L ----
+        if adapter_l:
+            anchor_l, delta_l, log_sigma_l, attn_l1, attn_l2, tau_l, g_pred_l, gate_l = adapter_l(t5_seq, clip_embeds["clip_l"])
+            gate_l_scaled = gate_l * gate_prob
+            delta_l_final = delta_l * strength * gate_l_scaled
+            clip_l_mod = clip_embeds["clip_l"] + delta_l_final
+            if use_anchor:
+                clip_l_mod = clip_l_mod * (1 - gate_l_scaled) + anchor_l * gate_l_scaled
+            if noise > 0:
+                clip_l_mod += torch.randn_like(clip_l_mod) * noise
+        else:
+            clip_l_mod = clip_embeds["clip_l"]
+            delta_l_final = torch.zeros_like(clip_l_mod)
+            gate_l_scaled = torch.zeros_like(clip_l_mod)
+            g_pred_l = torch.tensor(0.0)
+            tau_l = torch.tensor(0.0)
+        # ---- Adapter G ----
+        if adapter_g:
+            anchor_g, delta_g, log_sigma_g, attn_g1, attn_g2, tau_g, g_pred_g, gate_g = adapter_g(t5_seq, clip_embeds["clip_g"])
+            gate_g_scaled = gate_g * gate_prob
+            delta_g_final = delta_g * strength * gate_g_scaled
+            clip_g_mod = clip_embeds["clip_g"] + delta_g_final
+            if use_anchor:
+                clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
+            if noise > 0:
+                clip_g_mod += torch.randn_like(clip_g_mod) * noise
+        else:
+            clip_g_mod = clip_embeds["clip_g"]
+            delta_g_final = torch.zeros_like(clip_g_mod)
+            gate_g_scaled = torch.zeros_like(clip_g_mod)
+            g_pred_g = torch.tensor(0.0)
+            tau_g = torch.tensor(0.0)
+        # ---- Combine embeddings ----
+        prompt_embeds = torch.cat([clip_l_mod, clip_g_mod], dim=-1).to(dtype)
+        neg_embeds = torch.cat([clip_embeds["neg_clip_l"], clip_embeds["neg_clip_g"]], dim=-1).to(dtype)
+        # ---- Generate image ----
+        generator = torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
+        image = pipe(
+            prompt_embeds=prompt_embeds,
+            pooled_prompt_embeds=clip_embeds["pooled"],
+            negative_prompt_embeds=neg_embeds,
+            negative_pooled_prompt_embeds=clip_embeds["neg_pooled"],
+            num_inference_steps=steps,
+            guidance_scale=cfg_scale,
+            width=width,
+            height=height,
+            num_images_per_prompt=1,
+            generator=generator,
+        ).images[0]
     return (
         image,
         plot_heat(delta_l_final.squeeze().cpu().numpy(), "Δ CLIP-L"),
+        plot_heat(gate_l_scaled.squeeze().cpu().numpy(), "Gate CLIP-L"),
         plot_heat(delta_g_final.squeeze().cpu().numpy(), "Δ CLIP-G"),
         plot_heat(gate_g_scaled.squeeze().cpu().numpy(), "Gate CLIP-G"),
         f"g_pred_l: {g_pred_l.mean().item():.3f}, τ_l: {tau_l.mean().item():.3f}",
         f"g_pred_g: {g_pred_g.mean().item():.3f}, τ_g: {tau_g.mean().item():.3f}"
     )
 # ─── Gradio Interface ─────────────────────────────────────────
 with gr.Blocks(title="SDXL Dual Shunt Adapter", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧠 SDXL Dual Shunt Adapter • T5→CLIP Enhancement")