Spaces:

ktrndy
/

diffusion-image-gen

Sleeping

App Files Files Community

ktrndy commited on Feb 6

Commit

909eb62

verified ·

1 Parent(s): b3d4196

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -10

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import gradio as gr
 import numpy as np
 import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id_default = "CompVis/stable-diffusion-v1-4"  # Replace to the model you would like to use
@@ -18,25 +19,89 @@ MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 # @spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
     prompt,
     negative_prompt,
-    width,
-    height,
     model_id=model_id_default,
     seed=42,
     guidance_scale=7.0,
     num_inference_steps=20,
     progress=gr.Progress(track_tqdm=True),
 ):
-    generator = torch.Generator().manual_seed(seed)
-    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
     pipe = pipe.to(device)
     image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
         width=width,
@@ -94,6 +159,14 @@ with gr.Blocks(css=css, fill_height=True) as demo:
                 value=7.0,  # Replace with defaults that work for your model
             )
             num_inference_steps = gr.Slider(
                 label="Number of inference steps",
                 minimum=1,
@@ -141,4 +214,4 @@ with gr.Blocks(css=css, fill_height=True) as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import numpy as np
 import random
+import os
 import torch
+from diffusers import StableDiffusionPipeline
+from peft import PeftModel, LoraConfig
+from diffusers import DiffusionPipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id_default = "CompVis/stable-diffusion-v1-4"  # Replace to the model you would like to use
 MAX_IMAGE_SIZE = 1024
+def get_lora_sd_pipeline(
+    ckpt_dir='./output',
+    base_model_name_or_path=model_id_default,
+    dtype=torch_dtype,
+    device=device,
+    adapter_name="default"
+):
+    unet_sub_dir = os.path.join(ckpt_dir, "unet")
+    text_encoder_sub_dir = os.path.join(ckpt_dir, "text_encoder")
+    if os.path.exists(text_encoder_sub_dir) and base_model_name_or_path is None:
+        config = LoraConfig.from_pretrained(text_encoder_sub_dir)
+        base_model_name_or_path = config.base_model_name_or_path
+    if base_model_name_or_path is None:
+        raise ValueError("Please specify the base model name or path")
+    pipe = StableDiffusionPipeline.from_pretrained(base_model_name_or_path, torch_dtype=dtype).to(device)
+    pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
+    if os.path.exists(text_encoder_sub_dir):
+        pipe.text_encoder = PeftModel.from_pretrained(
+            pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
+        )
+    if dtype in (torch.float16, torch.bfloat16):
+        pipe.unet.half()
+        pipe.text_encoder.half()
+    pipe.to(device)
+    return pipe
+def encode_prompt(prompt, tokenizer, text_encoder):
+    text_inputs = tokenizer(
+        prompt,
+        padding="max_length",
+        max_length=tokenizer.model_max_length,
+        return_tensors="pt",
+    )
+    with torch.no_grad():
+        if len(text_inputs.input_ids[0]) < tokenizer.model_max_length:
+            prompt_embeds = text_encoder(text_inputs.input_ids.to(text_encoder.device))[0]
+        else:
+            embeds = []
+            start = 0
+            while start < tokenizer.model_max_length:
+                end = start + tokenizer.model_max_length
+                part_of_text_inputs = text_inputs.input_ids[0][start:end]
+                if len(part_of_text_inputs) < tokenizer.model_max_length:
+                    part_of_text_inputs = torch.cat([part_of_text_inputs, torch.tensor([tokenizer.pad_token_id] * (tokenizer.model_max_length - len(part_of_text_inputs)))])
+                embeds.append(text_encoder(part_of_text_inputs.to(text_encoder.device).unsqueeze(0))[0])
+                start += int((8/11)*tokenizer.model_max_length)
+            prompt_embeds = torch.mean(torch.stack(embeds, dim=0), dim=0)
+    return prompt_embeds
+pipe = get_lora_sd_pipeline(adapter_name="sticker_of_funny_cat_Pusheen")
 # @spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
     prompt,
     negative_prompt,
+    width=512,
+    height=512,
     model_id=model_id_default,
     seed=42,
     guidance_scale=7.0,
+    lora_scale=0.5,
     num_inference_steps=20,
     progress=gr.Progress(track_tqdm=True),
 ):
+    generator = torch.Generator(device).manual_seed(seed)
+    pipe = get_lora_sd_pipeline(base_model_name_or_path=model_id,
+                                adapter_name="sticker_of_funny_cat_Pusheen")
     pipe = pipe.to(device)
+    prompt_embeds = encode_prompt(prompt, pipe.tokenizer, pipe.text_encoder)
+    negative_prompt_embeds = encode_prompt(negative_prompt, pipe.tokenizer, pipe.text_encoder)
+    pipe.fuse_lora(lora_scale=lora_scale)
     image = pipe(
+        prompt_embeds=prompt_embeds,
+        negative_prompt_embeds=negative_prompt_embeds,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
         width=width,
                 value=7.0,  # Replace with defaults that work for your model
             )
+            lora_scale = gr.Slider(
+                label="LoRA scale",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.1,
+                value=0.5,
+            )
             num_inference_steps = gr.Slider(
                 label="Number of inference steps",
                 minimum=1,
     )
 if __name__ == "__main__":
+    demo.launch()