Spaces:

AbstractPhil
/

shunt-adapter-testing

Runtime error

App Files Files Community

AbstractPhil commited on Jun 1

Commit

1e5ce4d

1 Parent(s): 7b42604

yes

Browse files

Files changed (3) hide show

__pycache__/two_stream_shunt_adapter.cpython-310.pyc +0 -0
app.py +280 -93
two_stream_shunt_adapter.py +110 -318

__pycache__/two_stream_shunt_adapter.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/two_stream_shunt_adapter.cpython-310.pyc and b/__pycache__/two_stream_shunt_adapter.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -3,25 +3,35 @@ import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
 from transformers import T5Tokenizer, T5EncoderModel
-from diffusers import DiffusionPipeline
-from safetensors.torch import safe_open
 from huggingface_hub import hf_hub_download
 from two_stream_shunt_adapter import TwoStreamShuntAdapter
-from adapter_config import T5_SHUNT_REPOS
 # ─── Device & Model Setup ─────────────────────────────────────
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
 t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base").to(device).eval()
-pipe = DiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     torch_dtype=dtype,
-    variant="fp16" if dtype == torch.float16 else None
 ).to(device)
 # ─── Adapter Configs ──────────────────────────────────────────
 clip_l_opts = T5_SHUNT_REPOS["clip_l"]["shunts_available"]["shunt_list"]
 clip_g_opts = T5_SHUNT_REPOS["clip_g"]["shunts_available"]["shunt_list"]
@@ -31,8 +41,11 @@ config_l = T5_SHUNT_REPOS["clip_l"]["config"]
 config_g = T5_SHUNT_REPOS["clip_g"]["config"]
 # ─── Loader ───────────────────────────────────────────────────
 def load_adapter(repo, filename, config):
     path = hf_hub_download(repo_id=repo, filename=filename)
     model = TwoStreamShuntAdapter(config).eval()
     tensors = {}
     with safe_open(path, framework="pt", device="cpu") as f:
@@ -42,103 +55,277 @@ def load_adapter(repo, filename, config):
     model.to(device)
     return model
-# ─── Inference ────────────────────────────────────────────────
-@torch.no_grad()
-def infer(prompt, adapter_l_file, adapter_g_file, strength, noise, gate_prob, use_anchor):
-    adapter_list = []
-    # Load adapters with config
-    adapter_list.append({
-        "adapter": load_adapter(repo_l, adapter_l_file, config_l),
-        "config": config_l
-    })
-    adapter_list.append({
-        "adapter": load_adapter(repo_g, adapter_g_file, config_g),
-        "config": config_g
-    })
-    # Encode prompt via T5
-    t5_ids = t5_tok(prompt, return_tensors="pt").input_ids.to(device)
-    t5_seq = t5_mod(t5_ids).last_hidden_state  # (B, L, 768)
-    # Encode prompt via SDXL normally to get CLIP-L and CLIP-G outputs
-    prompt_embeds, pooled_prompt_embeds = pipe._encode_prompt(
-        prompt=prompt,
-        device=device,
-        num_images_per_prompt=1,
-        do_classifier_free_guidance=False,
-    )
-    total_dim = prompt_embeds.shape[-1]
-    cond_tensor = prompt_embeds.clone()
-    for adapter_info in adapter_list:
-        adapter_model = adapter_info["adapter"]
-        adapter_config = adapter_info["config"]
-        clip_dim = adapter_config["clip"]["hidden_size"]
-        if clip_dim == 768:
-            clip_slice = cond_tensor[:, :, :768]
-            slice_start, slice_end = 0, 768
-        elif clip_dim == 1280:
-            clip_slice = cond_tensor[:, :, 768:2048] if total_dim >= 2048 else cond_tensor[:, :, 768:]
-            slice_start, slice_end = 768, 2048
-        else:
-            continue
-        anchor, delta_mean_adapter, log_sigma_adapter, _, _, _, g_pred_adapter, gate_adapter = adapter_model(t5_seq, clip_slice)
-        gate = gate_adapter * gate_prob
-        delta = (delta_mean_adapter + 0.0) * strength * gate
-        if delta.shape[1] != clip_slice.shape[1]:
-            delta = torch.nn.functional.interpolate(
-                delta.transpose(1, 2),
-                size=clip_slice.size(1),
-                mode="nearest"
-            ).transpose(1, 2)
         if use_anchor:
-            clip_slice = clip_slice * (1 - gate) + anchor * gate
         if noise > 0:
-            clip_slice = clip_slice + torch.randn_like(clip_slice) * noise
-        cond_tensor[:, :, slice_start:slice_end] = (clip_slice + delta).type_as(cond_tensor)
-    pooled_embed = cond_tensor.mean(dim=1)
     image = pipe(
-        prompt_embeds=cond_tensor,
-        pooled_prompt_embeds=pooled_embed,
-        negative_prompt_embeds=torch.zeros_like(cond_tensor),
-        negative_pooled_prompt_embeds=torch.zeros_like(pooled_embed),
-        num_inference_steps=20,
-        guidance_scale=5.0
     ).images[0]
-    return image
-# ─── Gradio App ───────────────────────────────────────────────
-with gr.Blocks(title="Dual Adapter T5→CLIP") as demo:
-    gr.Markdown("# 🧠 Dual Shunt Adapter • SDXL Inference")
     with gr.Row():
-        with gr.Column():
-            prompt = gr.Textbox(label="Prompt", value="a futuristic control station")
-            adapter_l = gr.Dropdown(choices=clip_l_opts, label="CLIP-L (768d) Adapter")
-            adapter_g = gr.Dropdown(choices=clip_g_opts, label="CLIP-G (1280d) Adapter")
-            strength = gr.Slider(0.0, 5.0, value=1.0, step=0.1, label="Adapter Strength")
-            noise = gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Noise Injection")
-            gate_prob = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Gate Probability")
-            use_anchor = gr.Checkbox(label="Use Anchor", value=True)
-            run_btn = gr.Button("Run")
-        with gr.Column():
-            out_img = gr.Image(label="Generated Image")
     run_btn.click(
-        fn=infer,
-        inputs=[prompt, adapter_l, adapter_g, strength, noise, gate_prob, use_anchor],
-        outputs=out_img
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import numpy as np
 import matplotlib.pyplot as plt
 from transformers import T5Tokenizer, T5EncoderModel
+from diffusers import StableDiffusionXLPipeline, DDIMScheduler, EulerDiscreteScheduler, DPMSolverMultistepScheduler
+from safetensors.torch import load_file
 from huggingface_hub import hf_hub_download
 from two_stream_shunt_adapter import TwoStreamShuntAdapter
+from configs import T5_SHUNT_REPOS
 # ─── Device & Model Setup ─────────────────────────────────────
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+# T5 Model for semantic understanding
 t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
 t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base").to(device).eval()
+# SDXL Pipeline with proper text encoders
+pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     torch_dtype=dtype,
+    variant="fp16" if dtype == torch.float16 else None,
+    use_safetensors=True
 ).to(device)
+# Available schedulers
+SCHEDULERS = {
+    "DPM++ 2M": DPMSolverMultistepScheduler,
+    "DDIM": DDIMScheduler,
+    "Euler": EulerDiscreteScheduler,
+}
 # ─── Adapter Configs ──────────────────────────────────────────
 clip_l_opts = T5_SHUNT_REPOS["clip_l"]["shunts_available"]["shunt_list"]
 clip_g_opts = T5_SHUNT_REPOS["clip_g"]["shunts_available"]["shunt_list"]
 config_g = T5_SHUNT_REPOS["clip_g"]["config"]
 # ─── Loader ───────────────────────────────────────────────────
+from safetensors.torch import safe_open
 def load_adapter(repo, filename, config):
     path = hf_hub_download(repo_id=repo, filename=filename)
     model = TwoStreamShuntAdapter(config).eval()
     tensors = {}
     with safe_open(path, framework="pt", device="cpu") as f:
     model.to(device)
     return model
+# ─── Visualization ────────────────────────────────────────────
+def plot_heat(mat, title):
+    import io
+    fig, ax = plt.subplots(figsize=(6, 3), dpi=100)
+    im = ax.imshow(mat, aspect="auto", cmap="bwr", origin="upper")
+    ax.set_title(title)
+    plt.colorbar(im, ax=ax)
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight')
+    buf.seek(0)
+    plt.close(fig)
+    return buf
+# ─── SDXL Text Encoding ───────────────────────────────────────
+def encode_sdxl_prompt(prompt, negative_prompt=""):
+    """Generate proper CLIP-L and CLIP-G embeddings using SDXL's text encoders"""
+    # Tokenize for both encoders
+    tokens_l = pipe.tokenizer(
+        prompt,
+        padding="max_length",
+        max_length=77,
+        truncation=True,
+        return_tensors="pt"
+    ).input_ids.to(device)
+    tokens_g = pipe.tokenizer_2(
+        prompt,
+        padding="max_length",
+        max_length=77,
+        truncation=True,
+        return_tensors="pt"
+    ).input_ids.to(device)
+    # Negative prompts
+    neg_tokens_l = pipe.tokenizer(
+        negative_prompt,
+        padding="max_length",
+        max_length=77,
+        truncation=True,
+        return_tensors="pt"
+    ).input_ids.to(device)
+    neg_tokens_g = pipe.tokenizer_2(
+        negative_prompt,
+        padding="max_length",
+        max_length=77,
+        truncation=True,
+        return_tensors="pt"
+    ).input_ids.to(device)
+    with torch.no_grad():
+        # CLIP-L embeddings (768d)
+        clip_l_embeds = pipe.text_encoder(tokens_l)[0]
+        neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
+        # CLIP-G embeddings (1280d)
+        clip_g_embeds = pipe.text_encoder_2(tokens_g)[0]
+        neg_clip_g_embeds = pipe.text_encoder_2(neg_tokens_g)[0]
+        # Pooled embeddings for SDXL
+        pooled_embeds = pipe.text_encoder_2(tokens_g)[1]
+        neg_pooled_embeds = pipe.text_encoder_2(neg_tokens_g)[1]
+    return {
+        "clip_l": clip_l_embeds,
+        "clip_g": clip_g_embeds,
+        "neg_clip_l": neg_clip_l_embeds,
+        "neg_clip_g": neg_clip_g_embeds,
+        "pooled": pooled_embeds,
+        "neg_pooled": neg_pooled_embeds
+    }
+# ─── Inference ────────────────────────────────────────────────
+@torch.no_grad()
+def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noise, gate_prob,
+          use_anchor, steps, cfg_scale, scheduler_name, width, height, seed):
+    # Set seed for reproducibility
+    if seed != -1:
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+    # Set scheduler
+    if scheduler_name in SCHEDULERS:
+        pipe.scheduler = SCHEDULERS[scheduler_name].from_config(pipe.scheduler.config)
+    # Get T5 embeddings for semantic understanding
+    t5_ids = t5_tok(prompt, return_tensors="pt", padding=True, truncation=True).input_ids.to(device)
+    t5_seq = t5_mod(t5_ids).last_hidden_state
+    # Get proper SDXL CLIP embeddings
+    clip_embeds = encode_sdxl_prompt(prompt, negative_prompt)
+    # Load adapters
+    adapter_l = load_adapter(repo_l, adapter_l_file, config_l) if adapter_l_file else None
+    adapter_g = load_adapter(repo_g, adapter_g_file, config_g) if adapter_g_file else None
+    # Apply CLIP-L adapter
+    if adapter_l is not None:
+        anchor_l, delta_l, log_sigma_l, attn_l1, attn_l2, tau_l, g_pred_l, gate_l = adapter_l(t5_seq, clip_embeds["clip_l"])
+        gate_l_scaled = gate_l * gate_prob
+        delta_l_final = delta_l * strength * gate_l_scaled
+        clip_l_mod = clip_embeds["clip_l"] + delta_l_final
         if use_anchor:
+            clip_l_mod = clip_l_mod * (1 - gate_l_scaled) + anchor_l * gate_l_scaled
         if noise > 0:
+            clip_l_mod += torch.randn_like(clip_l_mod) * noise
+    else:
+        clip_l_mod = clip_embeds["clip_l"]
+        delta_l_final = torch.zeros_like(clip_embeds["clip_l"])
+        gate_l_scaled = torch.zeros_like(clip_embeds["clip_l"])
+        g_pred_l = torch.tensor(0.0)
+        tau_l = torch.tensor(0.0)
+    # Apply CLIP-G adapter
+    if adapter_g is not None:
+        anchor_g, delta_g, log_sigma_g, attn_g1, attn_g2, tau_g, g_pred_g, gate_g = adapter_g(t5_seq, clip_embeds["clip_g"])
+        gate_g_scaled = gate_g * gate_prob
+        delta_g_final = delta_g * strength * gate_g_scaled
+        clip_g_mod = clip_embeds["clip_g"] + delta_g_final
+        if use_anchor:
+            clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
+        if noise > 0:
+            clip_g_mod += torch.randn_like(clip_g_mod) * noise
+    else:
+        clip_g_mod = clip_embeds["clip_g"]
+        delta_g_final = torch.zeros_like(clip_embeds["clip_g"])
+        gate_g_scaled = torch.zeros_like(clip_embeds["clip_g"])
+        g_pred_g = torch.tensor(0.0)
+        tau_g = torch.tensor(0.0)
+    # Combine embeddings in SDXL format: [CLIP-L(768) + CLIP-G(1280)] = 2048
+    prompt_embeds = torch.cat([clip_l_mod, clip_g_mod], dim=-1).to(dtype)
+    neg_embeds = torch.cat([clip_embeds["neg_clip_l"], clip_embeds["neg_clip_g"]], dim=-1).to(dtype)
+    # Generate image with proper SDXL parameters
     image = pipe(
+        prompt_embeds=prompt_embeds,
+        pooled_prompt_embeds=clip_embeds["pooled"],
+        negative_prompt_embeds=neg_embeds,
+        negative_pooled_prompt_embeds=clip_embeds["neg_pooled"],
+        num_inference_steps=steps,
+        guidance_scale=cfg_scale,
+        width=width,
+        height=height,
+        generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
     ).images[0]
+    return (
+        image,
+        plot_heat(delta_l_final.squeeze().cpu().numpy(), "Δ CLIP-L"),
+        plot_heat(gate_l_scaled.squeeze().cpu().numpy(), "Gate CLIP-L"),
+        plot_heat(delta_g_final.squeeze().cpu().numpy(), "Δ CLIP-G"),
+        plot_heat(gate_g_scaled.squeeze().cpu().numpy(), "Gate CLIP-G"),
+        f"g_pred_l: {g_pred_l.mean().item():.3f}, τ_l: {tau_l.mean().item():.3f}",
+        f"g_pred_g: {g_pred_g.mean().item():.3f}, τ_g: {tau_g.mean().item():.3f}"
+    )
+# ─── Gradio Interface ─────────────────────────────────────────
+with gr.Blocks(title="SDXL Dual Shunt Adapter", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧠 SDXL Dual Shunt Adapter • T5→CLIP Enhancement")
+    gr.Markdown("Enhance SDXL generation by using T5 semantic understanding to modify CLIP embeddings")
     with gr.Row():
+        with gr.Column(scale=1):
+            # Prompts
+            with gr.Group():
+                gr.Markdown("### Prompts")
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    value="a futuristic control station with holographic displays",
+                    lines=3
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="blurry, low quality, distorted",
+                    lines=2
+                )
+            # Adapters
+            with gr.Group():
+                gr.Markdown("### Adapters")
+                adapter_l = gr.Dropdown(
+                    choices=["None"] + clip_l_opts,
+                    label="CLIP-L (768d) Adapter",
+                    value="None"
+                )
+                adapter_g = gr.Dropdown(
+                    choices=["None"] + clip_g_opts,
+                    label="CLIP-G (1280d) Adapter",
+                    value="None"
+                )
+            # Adapter Controls
+            with gr.Group():
+                gr.Markdown("### Adapter Controls")
+                strength = gr.Slider(0.0, 5.0, value=1.0, step=0.1, label="Adapter Strength")
+                noise = gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Noise Injection")
+                gate_prob = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Gate Probability")
+                use_anchor = gr.Checkbox(label="Use Anchor", value=True)
+            # Generation Settings
+            with gr.Group():
+                gr.Markdown("### Generation Settings")
+                with gr.Row():
+                    steps = gr.Slider(1, 100, value=25, step=1, label="Steps")
+                    cfg_scale = gr.Slider(1.0, 20.0, value=7.5, step=0.5, label="CFG Scale")
+                scheduler_name = gr.Dropdown(
+                    choices=list(SCHEDULERS.keys()),
+                    value="DPM++ 2M",
+                    label="Scheduler"
+                )
+                with gr.Row():
+                    width = gr.Slider(512, 1536, value=1024, step=64, label="Width")
+                    height = gr.Slider(512, 1536, value=1024, step=64, label="Height")
+                seed = gr.Number(value=-1, label="Seed (-1 for random)")
+            run_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            # Output
+            with gr.Group():
+                gr.Markdown("### Generated Image")
+                out_img = gr.Image(label="Result", height=400)
+            # Visualizations
+            with gr.Group():
+                gr.Markdown("### Adapter Visualizations")
+                with gr.Row():
+                    delta_l = gr.Image(label="Δ CLIP-L", height=200)
+                    gate_l = gr.Image(label="Gate CLIP-L", height=200)
+                with gr.Row():
+                    delta_g = gr.Image(label="Δ CLIP-G", height=200)
+                    gate_g = gr.Image(label="Gate CLIP-G", height=200)
+            # Stats
+            with gr.Group():
+                gr.Markdown("### Adapter Statistics")
+                stats_l = gr.Textbox(label="CLIP-L Stats", interactive=False)
+                stats_g = gr.Textbox(label="CLIP-G Stats", interactive=False)
+    # Event handlers
+    def process_adapters(adapter_l_val, adapter_g_val):
+        # Convert "None" back to None for processing
+        adapter_l_processed = None if adapter_l_val == "None" else adapter_l_val
+        adapter_g_processed = None if adapter_g_val == "None" else adapter_g_val
+        return adapter_l_processed, adapter_g_processed
+    def run_inference(*args):
+        # Process adapter selections
+        adapter_l_processed, adapter_g_processed = process_adapters(args[2], args[3])
+        # Call inference with processed adapters
+        new_args = list(args)
+        new_args[2] = adapter_l_processed
+        new_args[3] = adapter_g_processed
+        return infer(*new_args)
     run_btn.click(
+        fn=run_inference,
+        inputs=[
+            prompt, negative_prompt, adapter_l, adapter_g, strength, noise, gate_prob,
+            use_anchor, steps, cfg_scale, scheduler_name, width, height, seed
+        ],
+        outputs=[out_img, delta_l, gate_l, delta_g, gate_g, stats_l, stats_g]
     )
 if __name__ == "__main__":
+    demo.launch(share=True)

two_stream_shunt_adapter.py CHANGED Viewed

@@ -1,331 +1,123 @@
 import torch
-import gradio as gr
-import numpy as np
-import matplotlib.pyplot as plt
-from transformers import T5Tokenizer, T5EncoderModel
-from diffusers import StableDiffusionXLPipeline, DDIMScheduler, EulerDiscreteScheduler, DPMSolverMultistepScheduler
-from safetensors.torch import load_file
-from huggingface_hub import hf_hub_download
-from two_stream_shunt_adapter import TwoStreamShuntAdapter
-from configs import T5_SHUNT_REPOS
-# ─── Device & Model Setup ─────────────────────────────────────
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-# T5 Model for semantic understanding
-t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
-t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base").to(device).eval()
-# SDXL Pipeline with proper text encoders
-pipe = StableDiffusionXLPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    torch_dtype=dtype,
-    variant="fp16" if dtype == torch.float16 else None,
-    use_safetensors=True
-).to(device)
-# Available schedulers
-SCHEDULERS = {
-    "DPM++ 2M": DPMSolverMultistepScheduler,
-    "DDIM": DDIMScheduler,
-    "Euler": EulerDiscreteScheduler,
-}
-# ─── Adapter Configs ──────────────────────────────────────────
-clip_l_opts = T5_SHUNT_REPOS["clip_l"]["shunts_available"]["shunt_list"]
-clip_g_opts = T5_SHUNT_REPOS["clip_g"]["shunts_available"]["shunt_list"]
-repo_l = T5_SHUNT_REPOS["clip_l"]["repo"]
-repo_g = T5_SHUNT_REPOS["clip_g"]["repo"]
-config_l = T5_SHUNT_REPOS["clip_l"]["config"]
-config_g = T5_SHUNT_REPOS["clip_g"]["config"]
-# ─── Loader ───────────────────────────────────────────────────
-from safetensors.torch import safe_open
-def load_adapter(repo, filename, config):
-    path = hf_hub_download(repo_id=repo, filename=filename)
-    model = TwoStreamShuntAdapter(config).eval()
-    tensors = {}
-    with safe_open(path, framework="pt", device="cpu") as f:
-        for key in f.keys():
-            tensors[key] = f.get_tensor(key)
-    model.load_state_dict(tensors)
-    model.to(device)
-    return model
-# ─── Visualization ────────────────────────────────────────────
-def plot_heat(mat, title):
-    import io
-    fig, ax = plt.subplots(figsize=(6, 3), dpi=100)
-    im = ax.imshow(mat, aspect="auto", cmap="bwr", origin="upper")
-    ax.set_title(title)
-    plt.colorbar(im, ax=ax)
-    buf = io.BytesIO()
-    plt.savefig(buf, format="png", bbox_inches='tight')
-    buf.seek(0)
-    plt.close(fig)
-    return buf
-# ─── SDXL Text Encoding ───────────────────────────────────────
-def encode_sdxl_prompt(prompt, negative_prompt=""):
-    """Generate proper CLIP-L and CLIP-G embeddings using SDXL's text encoders"""
-    # Tokenize for both encoders
-    tokens_l = pipe.tokenizer(
-        prompt,
-        padding="max_length",
-        max_length=77,
-        truncation=True,
-        return_tensors="pt"
-    ).input_ids.to(device)
-    tokens_g = pipe.tokenizer_2(
-        prompt,
-        padding="max_length",
-        max_length=77,
-        truncation=True,
-        return_tensors="pt"
-    ).input_ids.to(device)
-    # Negative prompts
-    neg_tokens_l = pipe.tokenizer(
-        negative_prompt,
-        padding="max_length",
-        max_length=77,
-        truncation=True,
-        return_tensors="pt"
-    ).input_ids.to(device)
-    neg_tokens_g = pipe.tokenizer_2(
-        negative_prompt,
-        padding="max_length",
-        max_length=77,
-        truncation=True,
-        return_tensors="pt"
-    ).input_ids.to(device)
-    with torch.no_grad():
-        # CLIP-L embeddings (768d)
-        clip_l_embeds = pipe.text_encoder(tokens_l)[0]
-        neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
-        # CLIP-G embeddings (1280d)
-        clip_g_embeds = pipe.text_encoder_2(tokens_g)[0]
-        neg_clip_g_embeds = pipe.text_encoder_2(neg_tokens_g)[0]
-        # Pooled embeddings for SDXL
-        pooled_embeds = pipe.text_encoder_2(tokens_g)[1]
-        neg_pooled_embeds = pipe.text_encoder_2(neg_tokens_g)[1]
-    return {
-        "clip_l": clip_l_embeds,
-        "clip_g": clip_g_embeds,
-        "neg_clip_l": neg_clip_l_embeds,
-        "neg_clip_g": neg_clip_g_embeds,
-        "pooled": pooled_embeds,
-        "neg_pooled": neg_pooled_embeds
-    }
-# ─── Inference ────────────────────────────────────────────────
-@torch.no_grad()
-def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noise, gate_prob,
-          use_anchor, steps, cfg_scale, scheduler_name, width, height, seed):
-    # Set seed for reproducibility
-    if seed != -1:
-        torch.manual_seed(seed)
-        np.random.seed(seed)
-    # Set scheduler
-    if scheduler_name in SCHEDULERS:
-        pipe.scheduler = SCHEDULERS[scheduler_name].from_config(pipe.scheduler.config)
-    # Get T5 embeddings for semantic understanding
-    t5_ids = t5_tok(prompt, return_tensors="pt", padding=True, truncation=True).input_ids.to(device)
-    t5_seq = t5_mod(t5_ids).last_hidden_state
-    # Get proper SDXL CLIP embeddings
-    clip_embeds = encode_sdxl_prompt(prompt, negative_prompt)
-    # Load adapters
-    adapter_l = load_adapter(repo_l, adapter_l_file, config_l) if adapter_l_file else None
-    adapter_g = load_adapter(repo_g, adapter_g_file, config_g) if adapter_g_file else None
-    # Apply CLIP-L adapter
-    if adapter_l is not None:
-        anchor_l, delta_l, log_sigma_l, attn_l1, attn_l2, tau_l, g_pred_l, gate_l = adapter_l(t5_seq, clip_embeds["clip_l"])
-        gate_l_scaled = gate_l * gate_prob
-        delta_l_final = delta_l * strength * gate_l_scaled
-        clip_l_mod = clip_embeds["clip_l"] + delta_l_final
-        if use_anchor:
-            clip_l_mod = clip_l_mod * (1 - gate_l_scaled) + anchor_l * gate_l_scaled
-        if noise > 0:
-            clip_l_mod += torch.randn_like(clip_l_mod) * noise
-    else:
-        clip_l_mod = clip_embeds["clip_l"]
-        delta_l_final = torch.zeros_like(clip_embeds["clip_l"])
-        gate_l_scaled = torch.zeros_like(clip_embeds["clip_l"])
-        g_pred_l = torch.tensor(0.0)
-        tau_l = torch.tensor(0.0)
-    # Apply CLIP-G adapter
-    if adapter_g is not None:
-        anchor_g, delta_g, log_sigma_g, attn_g1, attn_g2, tau_g, g_pred_g, gate_g = adapter_g(t5_seq, clip_embeds["clip_g"])
-        gate_g_scaled = gate_g * gate_prob
-        delta_g_final = delta_g * strength * gate_g_scaled
-        clip_g_mod = clip_embeds["clip_g"] + delta_g_final
-        if use_anchor:
-            clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
-        if noise > 0:
-            clip_g_mod += torch.randn_like(clip_g_mod) * noise
-    else:
-        clip_g_mod = clip_embeds["clip_g"]
-        delta_g_final = torch.zeros_like(clip_embeds["clip_g"])
-        gate_g_scaled = torch.zeros_like(clip_embeds["clip_g"])
-        g_pred_g = torch.tensor(0.0)
-        tau_g = torch.tensor(0.0)
-    # Combine embeddings in SDXL format: [CLIP-L(768) + CLIP-G(1280)] = 2048
-    prompt_embeds = torch.cat([clip_l_mod, clip_g_mod], dim=-1).to(dtype)
-    neg_embeds = torch.cat([clip_embeds["neg_clip_l"], clip_embeds["neg_clip_g"]], dim=-1).to(dtype)
-    # Generate image with proper SDXL parameters
-    image = pipe(
-        prompt_embeds=prompt_embeds,
-        pooled_prompt_embeds=clip_embeds["pooled"],
-        negative_prompt_embeds=neg_embeds,
-        negative_pooled_prompt_embeds=clip_embeds["neg_pooled"],
-        num_inference_steps=steps,
-        guidance_scale=cfg_scale,
-        width=width,
-        height=height,
-        generator=torch.Generator(device=device).manual_seed(seed) if seed != -1 else None
-    ).images[0]
-    return (
-        image,
-        plot_heat(delta_l_final.squeeze().cpu().numpy(), "Δ CLIP-L"),
-        plot_heat(gate_l_scaled.squeeze().cpu().numpy(), "Gate CLIP-L"),
-        plot_heat(delta_g_final.squeeze().cpu().numpy(), "Δ CLIP-G"),
-        plot_heat(gate_g_scaled.squeeze().cpu().numpy(), "Gate CLIP-G"),
-        f"g_pred_l: {g_pred_l.mean().item():.3f}, τ_l: {tau_l.mean().item():.3f}",
-        f"g_pred_g: {g_pred_g.mean().item():.3f}, τ_g: {tau_g.mean().item():.3f}"
-    )
-# ─── Gradio Interface ─────────────────────────────────────────
-with gr.Blocks(title="SDXL Dual Shunt Adapter", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🧠 SDXL Dual Shunt Adapter • T5→CLIP Enhancement")
-    gr.Markdown("Enhance SDXL generation by using T5 semantic understanding to modify CLIP embeddings")
-    with gr.Row():
-        with gr.Column(scale=1):
-            # Prompts
-            with gr.Group():
-                gr.Markdown("### Prompts")
-                prompt = gr.Textbox(
-                    label="Prompt",
-                    value="a futuristic control station with holographic displays",
-                    lines=3
-                )
-                negative_prompt = gr.Textbox(
-                    label="Negative Prompt",
-                    value="blurry, low quality, distorted",
-                    lines=2
-                )
-            # Adapters
-            with gr.Group():
-                gr.Markdown("### Adapters")
-                adapter_l = gr.Dropdown(
-                    choices=["None"] + clip_l_opts,
-                    label="CLIP-L (768d) Adapter",
-                    value="None"
-                )
-                adapter_g = gr.Dropdown(
-                    choices=["None"] + clip_g_opts,
-                    label="CLIP-G (1280d) Adapter",
-                    value="None"
-                )
-            # Adapter Controls
-            with gr.Group():
-                gr.Markdown("### Adapter Controls")
-                strength = gr.Slider(0.0, 5.0, value=1.0, step=0.1, label="Adapter Strength")
-                noise = gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="Noise Injection")
-                gate_prob = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Gate Probability")
-                use_anchor = gr.Checkbox(label="Use Anchor", value=True)
-            # Generation Settings
-            with gr.Group():
-                gr.Markdown("### Generation Settings")
-                with gr.Row():
-                    steps = gr.Slider(1, 100, value=25, step=1, label="Steps")
-                    cfg_scale = gr.Slider(1.0, 20.0, value=7.5, step=0.5, label="CFG Scale")
-                scheduler_name = gr.Dropdown(
-                    choices=list(SCHEDULERS.keys()),
-                    value="DPM++ 2M",
-                    label="Scheduler"
-                )
-                with gr.Row():
-                    width = gr.Slider(512, 1536, value=1024, step=64, label="Width")
-                    height = gr.Slider(512, 1536, value=1024, step=64, label="Height")
-                seed = gr.Number(value=-1, label="Seed (-1 for random)")
-            run_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
-        with gr.Column(scale=1):
-            # Output
-            with gr.Group():
-                gr.Markdown("### Generated Image")
-                out_img = gr.Image(label="Result", height=400)
-            # Visualizations
-            with gr.Group():
-                gr.Markdown("### Adapter Visualizations")
-                with gr.Row():
-                    delta_l = gr.Image(label="Δ CLIP-L", height=200)
-                    gate_l = gr.Image(label="Gate CLIP-L", height=200)
-                with gr.Row():
-                    delta_g = gr.Image(label="Δ CLIP-G", height=200)
-                    gate_g = gr.Image(label="Gate CLIP-G", height=200)
-            # Stats
-            with gr.Group():
-                gr.Markdown("### Adapter Statistics")
-                stats_l = gr.Textbox(label="CLIP-L Stats", interactive=False)
-                stats_g = gr.Textbox(label="CLIP-G Stats", interactive=False)
-    # Event handlers
-    def process_adapters(adapter_l_val, adapter_g_val):
-        # Convert "None" back to None for processing
-        adapter_l_processed = None if adapter_l_val == "None" else adapter_l_val
-        adapter_g_processed = None if adapter_g_val == "None" else adapter_g_val
-        return adapter_l_processed, adapter_g_processed
-    def run_inference(*args):
-        # Process adapter selections
-        adapter_l_processed, adapter_g_processed = process_adapters(args[2], args[3])
-        # Call inference with processed adapters
-        new_args = list(args)
-        new_args[2] = adapter_l_processed
-        new_args[3] = adapter_g_processed
-        return infer(*new_args)
-    run_btn.click(
-        fn=run_inference,
-        inputs=[
-            prompt, negative_prompt, adapter_l, adapter_g, strength, noise, gate_prob,
-            use_anchor, steps, cfg_scale, scheduler_name, width, height, seed
-        ],
-        outputs=[out_img, delta_l, gate_l, delta_g, gate_g, stats_l, stats_g]
-    )
-if __name__ == "__main__":
-    demo.launch(share=True)

 import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# ─── Residual Pocket Block ───────────────────────────────────
+class BottleneckResBlock(nn.Module):
+    def __init__(self, dim, kernel=3, dropout=0.1):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.conv = nn.Conv1d(dim, dim, kernel_size=kernel, padding=kernel // 2, groups=1)
+        self.proj = nn.Sequential(
+            nn.Linear(dim, dim * 2),
+            nn.GELU(),
+            nn.Linear(dim * 2, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        residual = x
+        x = self.norm(x)
+        x = x.transpose(1, 2)
+        x = self.conv(x).transpose(1, 2)
+        return residual + self.proj(x)
+# ─── Two Stream Shunt Adapter ──────────────────────────────────────
+class TwoStreamShuntAdapter(nn.Module):
+    def __init__(self, config: dict):
+        super().__init__()
+        self.config = config
+        self.t5_dim = config["t5"]["hidden_size"]
+        self.clip_dim = config["clip"]["hidden_size"]
+        self.bneck = config["bottleneck"]
+        self.heads = config["heads"]
+        self.tau_init = config["tau_init"]
+        self.max_guidance = config["max_guidance"]
+        use_norm   = config.get("layer_norm", True)
+        use_do     = config.get("use_dropout", True)
+        do_p       = config.get("dropout", 0.1)
+        proj_depth = config.get("proj_layers", 2)
+        def build_projection(input_dim, output_dim):
+            layers = []
+            last_dim = input_dim
+            if use_norm:
+                layers.append(nn.LayerNorm(last_dim))
+            for i in range(proj_depth):
+                next_dim = self.bneck * (2 if i == 0 and proj_depth > 1 else 1)
+                layers.append(nn.Linear(last_dim, next_dim))
+                layers.append(nn.GELU())
+                if use_do:
+                    layers.append(nn.Dropout(do_p))
+                last_dim = next_dim
+            layers.append(nn.Linear(last_dim, output_dim))
+            return nn.Sequential(*layers)
+        # Projections
+        self.proj_t5   = build_projection(self.t5_dim, self.bneck)
+        self.proj_clip = build_projection(self.clip_dim, self.bneck)
+        # Attention
+        self.cross_t2c = nn.MultiheadAttention(self.bneck, self.heads, batch_first=True, dropout=do_p)
+        self.cross_c2t = nn.MultiheadAttention(self.bneck, self.heads, batch_first=True, dropout=do_p)
+        self.tau       = nn.Parameter(torch.full((self.heads, 1, 1), self.tau_init))
+        # Residual Pocket
+        self.pocket_blocks = nn.Sequential(
+            BottleneckResBlock(self.bneck, dropout=do_p),
+            BottleneckResBlock(self.bneck, dropout=do_p)
+        )
+        # Fuse
+        self.fuse = nn.Sequential(
+            nn.LayerNorm(2 * self.bneck),
+            nn.Linear(2 * self.bneck, self.bneck * 2),
+            nn.GELU(),
+            nn.Linear(self.bneck * 2, self.bneck)
+        )
+        # Output Projections
+        self.anchor_proj = build_projection(self.bneck, self.clip_dim)
+        self.delta_proj  = build_projection(self.bneck, self.clip_dim)
+        self.logsig_proj = build_projection(self.bneck, self.clip_dim)
+        self.gate_proj = nn.Sequential(
+            nn.LayerNorm(self.bneck),
+            nn.Linear(self.bneck, self.bneck),
+            nn.GELU(),
+            nn.Linear(self.bneck, 1),
+            nn.Tanh(),
+            nn.Sigmoid()
+        )
+        self.guidance_proj = nn.Sequential(
+            nn.LayerNorm(self.bneck),
+            nn.Linear(self.bneck, 1),
+            nn.Sigmoid()
+        )
+    def forward(self, t5_seq: torch.Tensor, clip_seq: torch.Tensor):
+        if self.config.get("assert_input_dims", True):
+            assert t5_seq.size(-1) == self.t5_dim
+            assert clip_seq.size(-1) == self.clip_dim
+        t5_b   = self.proj_t5(t5_seq)
+        clip_b = self.proj_clip(clip_seq)
+        t2c, attn_t2c = self.cross_t2c(t5_b, clip_b, clip_b, need_weights=True, average_attn_weights=False)
+        c2t, attn_c2t = self.cross_c2t(clip_b, t5_b, t5_b, need_weights=True, average_attn_weights=False)
+        pocket = self.pocket_blocks(t2c)
+        pocket_mean = pocket.mean(1, keepdim=True).expand(-1, clip_b.size(1), -1)
+        h = self.fuse(torch.cat([pocket_mean, c2t], dim=-1))
+        anchor    = self.anchor_proj(h)
+        delta     = self.delta_proj(h) * self.gate_proj(h)
+        log_sigma = self.logsig_proj(h)
+        g_tok  = self.guidance_proj(h).squeeze(-1)
+        g_pred = g_tok.mean(1, keepdim=True) * self.max_guidance
+        return anchor, delta, log_sigma, attn_t2c, attn_c2t, self.tau, g_pred, self.gate_proj(h)