Spaces:

dbaranchuk
/

instruct-p2p-distill

Runtime error

App Files Files Community

dbaranchuk commited on Jun 30, 2024

Commit

88adfd9

1 Parent(s): fac3c6f

Update space

Browse files

Files changed (1) hide show

app.py +45 -192

app.py CHANGED Viewed

@@ -5,131 +5,32 @@ import random
 import torch
 from diffusers import DDPMScheduler, StableDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
 import p2p, generation, inversion
-model_id = 'runwayml/stable-diffusion-v1-5'
-dtype=torch.float16
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Reverse
-# -----------------------------
-pipe_reverse = StableDiffusionPipeline.from_pretrained(model_id,
-                                                       scheduler=DDIMScheduler.from_pretrained(model_id,
-                                                                               subfolder="scheduler"),
-                                                       ).to(device=device, dtype=dtype)
-unet = UNet2DConditionModel.from_pretrained("dbaranchuk/sd15-cfg-distill-unet").to(device)
-pipe_reverse.unet = unet
-pipe_reverse.load_lora_weights("dbaranchuk/icd-lora-sd15",
-                               weight_name='reverse-259-519-779-999.safetensors')
-pipe_reverse.fuse_lora()
-pipe_reverse.to(device)
-# -----------------------------
-# Forward
-# -----------------------------
-pipe_forward = StableDiffusionPipeline.from_pretrained(model_id,
-                                                       scheduler=DDIMScheduler.from_pretrained(model_id,
-                                                                               subfolder="scheduler"),
-                                                       ).to(device=device, dtype=dtype)
-unet = UNet2DConditionModel.from_pretrained("dbaranchuk/sd15-cfg-distill-unet").to(device)
-pipe_forward.unet = unet
-pipe_forward.load_lora_weights("dbaranchuk/icd-lora-sd15",
-                               weight_name='forward-19-259-519-779.safetensors')
-pipe_forward.fuse_lora()
-pipe_forward.to(device)
-# -----------------------------
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 @spaces.GPU(duration=30)
-def infer(image_path, input_prompt, edited_prompt, guidance, tau,
-          crs, srs, amplify_factor, amplify_word,
-          blend_orig, blend_edited, is_replacement):
-    tokenizer = pipe_forward.tokenizer
-    noise_scheduler = DDPMScheduler.from_pretrained(
-                      "runwayml/stable-diffusion-v1-5", subfolder="scheduler", )
-    NUM_REVERSE_CONS_STEPS = 4
-    REVERSE_TIMESTEPS = [259, 519, 779, 999]
-    NUM_FORWARD_CONS_STEPS = 4
-    FORWARD_TIMESTEPS = [19, 259, 519, 779]
-    NUM_DDIM_STEPS = 50
-    solver = generation.Generator(
-    model=pipe_forward,
-    noise_scheduler=noise_scheduler,
-    n_steps=NUM_DDIM_STEPS,
-    forward_cons_model=pipe_forward,
-    forward_timesteps=FORWARD_TIMESTEPS,
-    reverse_cons_model=pipe_reverse,
-    reverse_timesteps=REVERSE_TIMESTEPS,
-    num_endpoints=NUM_REVERSE_CONS_STEPS,
-    num_forward_endpoints=NUM_FORWARD_CONS_STEPS,
-    max_forward_timestep_index=49,
-    start_timestep=19)
-    p2p.NUM_DDIM_STEPS = NUM_DDIM_STEPS
-    p2p.tokenizer = tokenizer
-    p2p.device = 'cuda'
-    prompt = [input_prompt]
-    (image_gt, image_rec), ddim_latent, uncond_embeddings = inversion.invert(
-         # Playing params
-         image_path=image_path,
-         prompt=prompt,
-         # Fixed params
-         is_cons_inversion=True,
-         w_embed_dim=512,
-         inv_guidance_scale=0.0,
-         stop_step=50,
-         solver=solver,
-         seed=10500)
-    p2p.NUM_DDIM_STEPS = 4
-    p2p.tokenizer = tokenizer
-    p2p.device = 'cuda'
-    prompts = [input_prompt,
-               edited_prompt
-              ]
-    # Playing params
-    cross_replace_steps = {'default_': crs, }
-    self_replace_steps = srs
-    blend_word = (((blend_orig,), (blend_edited,)))
-    eq_params = {"words": (amplify_word,), "values": (amplify_factor,)}
-    controller = p2p.make_controller(prompts,
-                                     is_replacement, # (is_replacement) True if only one word is changed
-                                     cross_replace_steps,
-                                     self_replace_steps,
-                                     blend_word,
-                                     eq_params)
-    tau = tau
-    image, _ = generation.runner(
-         # Playing params
-         guidance_scale=guidance-1,
-         tau1=tau,  # Dynamic guidance if tau < 1.0
-         tau2=tau,
-         # Fixed params
-         model=pipe_reverse,
-         is_cons_forward=True,
-         w_embed_dim=512,
-         solver=solver,
-         prompt=prompts,
-         controller=controller,
-         num_inference_steps=50,
-         generator=None,
-         latent=ddim_latent,
-         uncond_embeddings=uncond_embeddings,
-         return_type='image')
-    image = generation.to_pil_images(image[1, :, :, :])
     return image
 css="""
@@ -176,14 +77,8 @@ with gr.Blocks(css=css) as demo:
         )
         with gr.Row():
-            input_prompt = gr.Text(
-                label="Origial prompt",
-                max_lines=1,
-                placeholder="Enter your prompt",
-            )
-            prompt = gr.Text(
-                label="Edited prompt",
                 max_lines=1,
                 placeholder="Enter your prompt",
             )
@@ -192,7 +87,7 @@ with gr.Blocks(css=css) as demo:
         with gr.Row():
             with gr.Column():
-                input_image = gr.Image(label="Input image", height=512, width=512, show_label=False)
             with gr.Column():
                 result = gr.Image(label="Result", height=512, width=512, show_label=False)
@@ -201,68 +96,28 @@ with gr.Blocks(css=css) as demo:
             with gr.Row():
                 guidance_scale = gr.Slider(
-                    label="Guidance scale",
                     minimum=1.0,
-                    maximum=20.0,
                     step=1.0,
-                    value=20.0,
                 )
-                tau = gr.Slider(
-                    label="Dynamic guidance tau",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.2,
-                    value=0.8,
-                )
-            with gr.Row():
-                crs = gr.Slider(
-                    label="Cross replace steps",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.1,
-                    value=0.4
-                )
-                srs = gr.Slider(
-                    label="Self replace steps",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.1,
-                    value=0.4,
                 )
-            with gr.Row():
-                amplify_word = gr.Text(
-                   label="Amplify word",
-                    max_lines=1,
-                   placeholder="Enter your word",
-                 )
-                amplify_factor = gr.Slider(
-                    label="Amplify factor",
-                    minimum=0.0,
-                    maximum=30,
                     step=1.0,
-                    value=1,
                 )
-            with gr.Row():
-                blend_orig = gr.Text(
-                   label="Blended word 1",
-                    max_lines=1,
-                   placeholder="Enter your word",)
-                blend_edited = gr.Text(
-                   label="Blended word 2",
-                    max_lines=1,
-                   placeholder="Enter your word",)
-            with gr.Row():
-                is_replacement = gr.Checkbox(label="Is replacement?", value=False)
         with gr.Row():
             run_button = gr.Button("Edit", scale=0)
@@ -397,22 +252,20 @@ with gr.Blocks(css=css) as demo:
                 ],
             ]
-            gr.Examples(
-               examples = examples,
-               inputs =[input_image, input_prompt, prompt,
-                guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
-                blend_orig, blend_edited, is_replacement],
-               outputs=[
-                        result
-                        ],
-               fn=infer, cache_examples=True
-            )
     run_button.click(
         fn = infer,
-        inputs=[input_image, input_prompt, prompt,
-                guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
-                blend_orig, blend_edited, is_replacement],
         outputs = [result]
     )

 import torch
 from diffusers import DDPMScheduler, StableDiffusionPipeline, DDIMScheduler, UNet2DConditionModel
 import p2p, generation, inversion
+from diffusers import StableDiffusionInstructPix2PixPipeline, LCMScheduler
+# InstructPix2Pix with LCM specified scheduler
+pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
+       "timbrooks/instruct-pix2pix", torch_dtype=torch.float16
+       )
+pipe = pipe.to("cuda")
+pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+# Adapt the InstructPix2Pix model using the LoRA parameters
+adapter_id = "latent-consistency/lcm-lora-sdv1-5"
+pipe.load_lora_weights(adapter_id)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 @spaces.GPU(duration=30)
+def infer(image, edit_instruction, guidance_scale, image_guidance_scale, n_steps):
+    image = pipe(prompt=edit_instruction,
+             image=image,
+             num_inference_steps=n_steps,
+             guidance_scale=guidance_scale,
+             image_guidance_scale=image_guidance_scale,
+             ).images[0]
     return image
 css="""
         )
         with gr.Row():
+            edit_instruction = gr.Text(
+                label="Edit instruction",
                 max_lines=1,
                 placeholder="Enter your prompt",
             )
         with gr.Row():
             with gr.Column():
+                image = gr.Image(label="Input image", height=512, width=512, show_label=False)
             with gr.Column():
                 result = gr.Image(label="Result", height=512, width=512, show_label=False)
             with gr.Row():
                 guidance_scale = gr.Slider(
+                    label="guidance scale",
                     minimum=1.0,
+                    maximum=8.0,
                     step=1.0,
+                    value=2.0,
                 )
+                image_guidance_scale = gr.Slider(
+                    label="image guidance scale",
+                    minimum=1.0,
+                    maximum=8.0,
+                    step=1.0,
+                    value=1.0,
                 )
+                n_steps = gr.Slider(
+                    label="inference steps",
+                    minimum=1.0,
+                    maximum=10.0,
                     step=1.0,
+                    value=4.0,
                 )
         with gr.Row():
             run_button = gr.Button("Edit", scale=0)
                 ],
             ]
+            #gr.Examples(
+            #   examples = examples,
+            #   inputs =[input_image, input_prompt, prompt,
+            #    guidance_scale, tau, crs, srs, amplify_factor, amplify_word,
+            #    blend_orig, blend_edited, is_replacement],
+            #   outputs=[
+            #            result
+            #            ],
+            #   fn=infer, cache_examples=True
+            #)
     run_button.click(
         fn = infer,
+        inputs=[image, edit_instruction, guidance_scale, image_guidance_scale, n_steps]
         outputs = [result]
     )