Spaces:

Hatman
/

InstantStyle

Running on Zero

App Files Files Community

Hatman commited on Jul 18, 2024

Commit

8e3f1b8

verified ·

1 Parent(s): 851fa96

Update app.py

Browse files

Files changed (1) hide show

app.py +265 -265

app.py CHANGED Viewed

@@ -1,266 +1,266 @@
-import sys
-sys.path.append('./')
-import os
-import cv2
-import torch
-import random
-import numpy as np
-from PIL import Image
-from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
-import spaces
-import gradio as gr
-from huggingface_hub import hf_hub_download
-from ip_adapter import IPAdapterXL
-import os
-os.system("git lfs install")
-os.system("git clone https://huggingface.co/h94/IP-Adapter")
-os.system("mv IP-Adapter/sdxl_models sdxl_models")
-# global variable
-MAX_SEED = np.iinfo(np.int32).max
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
-# initialization
-base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
-image_encoder_path = "sdxl_models/image_encoder"
-ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
-controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
-controlnet = ControlNetModel.from_pretrained(controlnet_path, use_safetensors=False, torch_dtype=torch.float16).to(device)
-# load SDXL pipeline
-pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-    base_model_path,
-    controlnet=controlnet,
-    torch_dtype=torch.float16,
-    add_watermarker=False,
-)
-# load ip-adapter
-# target_blocks=["block"] for original IP-Adapter
-# target_blocks=["up_blocks.0.attentions.1"] for style blocks only
-# target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
-ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])
-def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    return seed
-def resize_img(
-    input_image,
-    max_side=1280,
-    min_side=1024,
-    size=None,
-    pad_to_max_side=False,
-    mode=Image.BILINEAR,
-    base_pixel_number=64,
-):
-    w, h = input_image.size
-    if size is not None:
-        w_resize_new, h_resize_new = size
-    else:
-        ratio = min_side / min(h, w)
-        w, h = round(ratio * w), round(ratio * h)
-        ratio = max_side / max(h, w)
-        input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
-        w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
-        h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
-    input_image = input_image.resize([w_resize_new, h_resize_new], mode)
-    if pad_to_max_side:
-        res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
-        offset_x = (max_side - w_resize_new) // 2
-        offset_y = (max_side - h_resize_new) // 2
-        res[
-            offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
-        ] = np.array(input_image)
-        input_image = Image.fromarray(res)
-    return input_image
-@spaces.GPU(enable_queue=True)
-def create_image(image_pil,
-                 input_image,
-                 prompt,
-                 n_prompt,
-                 scale,
-                 control_scale,
-                 guidance_scale,
-                 num_samples,
-                 num_inference_steps,
-                 seed,
-                 target="Load only style blocks",
-                 neg_content_prompt=None,
-                 neg_content_scale=0):
-    if isinstance(image_pil,  np.ndarray):
-        image_pil = Image.fromarray(image_pil)
-    if target =="Load original IP-Adapter":
-        # target_blocks=["blocks"] for original IP-Adapter
-        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"])
-    elif target=="Load only style blocks":
-        # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
-        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])
-    elif target=="Load only layout blocks":
-        # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
-        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["down_blocks.2.attentions.1"])
-    elif target == "Load style+layout block":
-        # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
-        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"])
-    if input_image is not None:
-        input_image = resize_img(input_image, max_side=1024)
-        cv_input_image = pil_to_cv2(input_image)
-        detected_map = cv2.Canny(cv_input_image, 50, 200)
-        canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
-    else:
-        canny_map = Image.new('RGB', (1024, 1024), color=(255, 255, 255))
-        control_scale = 0
-    if float(control_scale) == 0:
-        canny_map = canny_map.resize((1024,1024))
-    if len(neg_content_prompt) > 0 and neg_content_scale != 0:
-        images = ip_model.generate(pil_image=image_pil,
-                                prompt=prompt,
-                                negative_prompt=n_prompt,
-                                scale=scale,
-                                guidance_scale=guidance_scale,
-                                num_samples=num_samples,
-                                num_inference_steps=num_inference_steps,
-                                seed=seed,
-                                image=canny_map,
-                                controlnet_conditioning_scale=float(control_scale),
-                                neg_content_prompt=neg_content_prompt,
-                                neg_content_scale=neg_content_scale
-                                )
-    else:
-        images = ip_model.generate(pil_image=image_pil,
-                                prompt=prompt,
-                                negative_prompt=n_prompt,
-                                scale=scale,
-                                guidance_scale=guidance_scale,
-                                num_samples=num_samples,
-                                num_inference_steps=num_inference_steps,
-                                seed=seed,
-                                image=canny_map,
-                                controlnet_conditioning_scale=float(control_scale),
-                                )
-    return images
-def pil_to_cv2(image_pil):
-    image_np = np.array(image_pil)
-    image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
-    return image_cv2
-# Description
-title = r"""
-<h1 align="center">InstantStyle</h1>
-"""
-description = r"""
-How to use:<br>
-1. Upload a style image.
-2. Set stylization mode, only use style block by default.
-2. Enter a text prompt, as done in normal text-to-image models.
-3. Click the <b>Submit</b> button to begin customization.
-4. Share your stylized photo with your friends and enjoy! 😊
-Advanced usage:<br>
-1. Click advanced options.
-2. Upload another source image for image-based stylization using ControlNet.
-3. Enter negative content prompt to avoid content leakage.
-"""
-article = r"""
----
-```bibtex
-@article{wang2024instantstyle,
-  title={InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation},
-  author={Wang, Haofan and Wang, Qixun and Bai, Xu and Qin, Zekui and Chen, Anthony},
-  journal={arXiv preprint arXiv:2404.02733},
-  year={2024}
-}
-```
-"""
-block = gr.Blocks().queue(max_size=10, api_open=True)
-with block:
-    # description
-    gr.Markdown(title)
-    gr.Markdown(description)
-    with gr.Tabs():
-        with gr.Row():
-            with gr.Column():
-                with gr.Row():
-                    with gr.Column():
-                        image_pil = gr.Image(label="Style Image", type="numpy")
-                target = gr.Radio(["Load only style blocks", "Load style+layout block", "Load original IP-Adapter"],
-                                  value="Load only style blocks",
-                                  label="Style mode")
-                prompt = gr.Textbox(label="Prompt",
-                                    value="a cat, masterpiece, best quality, high quality")
-                scale = gr.Slider(minimum=0,maximum=2.0, step=0.01,value=1.0, label="Scale")
-                with gr.Accordion(open=False, label="Advanced Options"):
-                    with gr.Column():
-                        src_image_pil = gr.Image(label="Source Image (optional)", type='pil')
-                    control_scale = gr.Slider(minimum=0,maximum=1.0, step=0.01,value=0.5, label="Controlnet conditioning scale")
-                    n_prompt = gr.Textbox(label="Neg Prompt", value="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry")
-                    neg_content_prompt = gr.Textbox(label="Neg Content Prompt", value="")
-                    neg_content_scale = gr.Slider(minimum=0, maximum=1.0, step=0.01,value=0.5, label="Neg Content Scale")
-                    guidance_scale = gr.Slider(minimum=1,maximum=15.0, step=0.01,value=5.0, label="guidance scale")
-                    num_samples= gr.Slider(minimum=1,maximum=4.0, step=1.0,value=1.0, label="num samples")
-                    num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=20, label="num inference steps")
-                    seed = gr.Slider(minimum=-1000000,maximum=1000000,value=1, step=1, label="Seed Value")
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                generate_button = gr.Button("Generate Image")
-            with gr.Column():
-                generated_image = gr.Gallery(label="Generated Image")
-        generate_button.click(
-            fn=randomize_seed_fn,
-            inputs=[seed, randomize_seed],
-            outputs=seed,
-            queue=False,
-            api_name=False,
-        ).then(
-            fn=create_image,
-            inputs=[image_pil,
-                    src_image_pil,
-                    prompt,
-                    n_prompt,
-                    scale,
-                    control_scale,
-                    guidance_scale,
-                    num_samples,
-                    num_inference_steps,
-                    seed,
-                    target,
-                    neg_content_prompt,
-                    neg_content_scale],
-            outputs=[generated_image])
-    gr.Markdown(article)
 block.launch(show_error=True)

+import sys
+sys.path.append('./')
+import os
+import cv2
+import torch
+import random
+import numpy as np
+from PIL import Image
+from diffusers import KandinskyV22PriorPipeline, KandinskyV22ControlnetPipeline
+import spaces
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from ip_adapter import IPAdapterXL
+import os
+os.system("git lfs install")
+os.system("git clone https://huggingface.co/h94/IP-Adapter")
+os.system("mv IP-Adapter/sdxl_models sdxl_models")
+# global variable
+MAX_SEED = np.iinfo(np.int32).max
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
+# initialization
+base_model_path = "kandinsky-community/kandinsky-2-2-prior"
+image_encoder_path = "sdxl_models/image_encoder"
+ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
+controlnet_path = "kandinsky-community/kandinsky-2-2-controlnet-depth"
+controlnet = KandinskyV22ControlnetPipeline.from_pretrained(controlnet_path, use_safetensors=False, torch_dtype=torch.float16).to(device)
+# load SDXL pipeline
+pipe = KandinskyV22PriorPipeline.from_pretrained(
+    base_model_path,
+    controlnet=controlnet,
+    torch_dtype=torch.float16,
+    add_watermarker=False,
+)
+# load ip-adapter
+# target_blocks=["block"] for original IP-Adapter
+# target_blocks=["up_blocks.0.attentions.1"] for style blocks only
+# target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
+ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])
+def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
+def resize_img(
+    input_image,
+    max_side=1280,
+    min_side=1024,
+    size=None,
+    pad_to_max_side=False,
+    mode=Image.BILINEAR,
+    base_pixel_number=64,
+):
+    w, h = input_image.size
+    if size is not None:
+        w_resize_new, h_resize_new = size
+    else:
+        ratio = min_side / min(h, w)
+        w, h = round(ratio * w), round(ratio * h)
+        ratio = max_side / max(h, w)
+        input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
+        w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
+        h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
+    input_image = input_image.resize([w_resize_new, h_resize_new], mode)
+    if pad_to_max_side:
+        res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
+        offset_x = (max_side - w_resize_new) // 2
+        offset_y = (max_side - h_resize_new) // 2
+        res[
+            offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
+        ] = np.array(input_image)
+        input_image = Image.fromarray(res)
+    return input_image
+@spaces.GPU(enable_queue=True)
+def create_image(image_pil,
+                 input_image,
+                 prompt,
+                 n_prompt,
+                 scale,
+                 control_scale,
+                 guidance_scale,
+                 num_samples,
+                 num_inference_steps,
+                 seed,
+                 target="Load only style blocks",
+                 neg_content_prompt=None,
+                 neg_content_scale=0):
+    if isinstance(image_pil,  np.ndarray):
+        image_pil = Image.fromarray(image_pil)
+    if target =="Load original IP-Adapter":
+        # target_blocks=["blocks"] for original IP-Adapter
+        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"])
+    elif target=="Load only style blocks":
+        # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
+        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"])
+    elif target=="Load only layout blocks":
+        # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
+        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["down_blocks.2.attentions.1"])
+    elif target == "Load style+layout block":
+        # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
+        ip_model = IPAdapterXL(pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"])
+    if input_image is not None:
+        input_image = resize_img(input_image, max_side=1024)
+        cv_input_image = pil_to_cv2(input_image)
+        detected_map = cv2.Canny(cv_input_image, 50, 200)
+        canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
+    else:
+        canny_map = Image.new('RGB', (1024, 1024), color=(255, 255, 255))
+        control_scale = 0
+    if float(control_scale) == 0:
+        canny_map = canny_map.resize((1024,1024))
+    if len(neg_content_prompt) > 0 and neg_content_scale != 0:
+        images = ip_model.generate(pil_image=image_pil,
+                                prompt=prompt,
+                                negative_prompt=n_prompt,
+                                scale=scale,
+                                guidance_scale=guidance_scale,
+                                num_samples=num_samples,
+                                num_inference_steps=num_inference_steps,
+                                seed=seed,
+                                image=canny_map,
+                                controlnet_conditioning_scale=float(control_scale),
+                                neg_content_prompt=neg_content_prompt,
+                                neg_content_scale=neg_content_scale
+                                )
+    else:
+        images = ip_model.generate(pil_image=image_pil,
+                                prompt=prompt,
+                                negative_prompt=n_prompt,
+                                scale=scale,
+                                guidance_scale=guidance_scale,
+                                num_samples=num_samples,
+                                num_inference_steps=num_inference_steps,
+                                seed=seed,
+                                image=canny_map,
+                                controlnet_conditioning_scale=float(control_scale),
+                                )
+    return images
+def pil_to_cv2(image_pil):
+    image_np = np.array(image_pil)
+    image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    return image_cv2
+# Description
+title = r"""
+<h1 align="center">InstantStyle</h1>
+"""
+description = r"""
+How to use:<br>
+1. Upload a style image.
+2. Set stylization mode, only use style block by default.
+2. Enter a text prompt, as done in normal text-to-image models.
+3. Click the <b>Submit</b> button to begin customization.
+4. Share your stylized photo with your friends and enjoy! 😊
+Advanced usage:<br>
+1. Click advanced options.
+2. Upload another source image for image-based stylization using ControlNet.
+3. Enter negative content prompt to avoid content leakage.
+"""
+article = r"""
+---
+```bibtex
+@article{wang2024instantstyle,
+  title={InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation},
+  author={Wang, Haofan and Wang, Qixun and Bai, Xu and Qin, Zekui and Chen, Anthony},
+  journal={arXiv preprint arXiv:2404.02733},
+  year={2024}
+}
+```
+"""
+block = gr.Blocks().queue(max_size=10, api_open=True)
+with block:
+    # description
+    gr.Markdown(title)
+    gr.Markdown(description)
+    with gr.Tabs():
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    with gr.Column():
+                        image_pil = gr.Image(label="Style Image", type="numpy")
+                target = gr.Radio(["Load only style blocks", "Load style+layout block", "Load original IP-Adapter"],
+                                  value="Load only style blocks",
+                                  label="Style mode")
+                prompt = gr.Textbox(label="Prompt",
+                                    value="a cat, masterpiece, best quality, high quality")
+                scale = gr.Slider(minimum=0,maximum=2.0, step=0.01,value=1.0, label="Scale")
+                with gr.Accordion(open=False, label="Advanced Options"):
+                    with gr.Column():
+                        src_image_pil = gr.Image(label="Source Image (optional)", type='pil')
+                    control_scale = gr.Slider(minimum=0,maximum=1.0, step=0.01,value=0.5, label="Controlnet conditioning scale")
+                    n_prompt = gr.Textbox(label="Neg Prompt", value="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry")
+                    neg_content_prompt = gr.Textbox(label="Neg Content Prompt", value="")
+                    neg_content_scale = gr.Slider(minimum=0, maximum=1.0, step=0.01,value=0.5, label="Neg Content Scale")
+                    guidance_scale = gr.Slider(minimum=1,maximum=15.0, step=0.01,value=5.0, label="guidance scale")
+                    num_samples= gr.Slider(minimum=1,maximum=4.0, step=1.0,value=1.0, label="num samples")
+                    num_inference_steps = gr.Slider(minimum=5,maximum=50.0, step=1.0,value=20, label="num inference steps")
+                    seed = gr.Slider(minimum=-1000000,maximum=1000000,value=1, step=1, label="Seed Value")
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                generate_button = gr.Button("Generate Image")
+            with gr.Column():
+                generated_image = gr.Gallery(label="Generated Image")
+        generate_button.click(
+            fn=randomize_seed_fn,
+            inputs=[seed, randomize_seed],
+            outputs=seed,
+            queue=False,
+            api_name=False,
+        ).then(
+            fn=create_image,
+            inputs=[image_pil,
+                    src_image_pil,
+                    prompt,
+                    n_prompt,
+                    scale,
+                    control_scale,
+                    guidance_scale,
+                    num_samples,
+                    num_inference_steps,
+                    seed,
+                    target,
+                    neg_content_prompt,
+                    neg_content_scale],
+            outputs=[generated_image])
+    gr.Markdown(article)
 block.launch(show_error=True)