Lumina-Image-2.0

Runtime error

App Files Files Community

benjamin-paine commited on Jan 30

Commit

add09dc

verified ·

1 Parent(s): 0a55839

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -84

app.py CHANGED Viewed

@@ -6,87 +6,18 @@ import json
 import torch
 import spaces
-from huggingface_hub import hf_hub_download
-from diffusers import (
-    AutoencoderKL,
-    SD3Transformer2DModel,
-    StableDiffusion3Pipeline,
-    FlowMatchEulerDiscreteScheduler
-)
-from diffusers.loaders.single_file_utils import (
-    convert_sd3_transformer_checkpoint_to_diffusers,
-)
-from transformers import (
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-    T5Tokenizer
-)
-from accelerate import init_empty_weights
-from accelerate.utils import set_module_tensor_to_device
-from safetensors import safe_open
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/stable-diffusion-3.5-large"
-finetune_repo_id = "DoctorDiffusion/Absynth-2.0"
-finetune_filename = "Absynth_SD3.5L_2.0.safetensors"
 if torch.cuda.is_available():
     torch_dtype = torch.bfloat16
 else:
     torch_dtype = torch.float32
-# Initialize transformer
-config_file = hf_hub_download(repo_id=model_repo_id, filename="transformer/config.json")
-with open(config_file, "r") as fp:
-    config = json.load(fp)
-with init_empty_weights():
-    transformer = SD3Transformer2DModel.from_config(config)
-# Get transformer state dict and load
-model_file = hf_hub_download(repo_id=finetune_repo_id, filename=finetune_filename)
-state_dict = {}
-with safe_open(model_file, framework="pt") as f:
-    for key in f.keys():
-        state_dict[key] = f.get_tensor(key)
-state_dict = convert_sd3_transformer_checkpoint_to_diffusers(state_dict)
-for key, value in state_dict.items():
-    set_module_tensor_to_device(
-        transformer,
-        key,
-        device,
-        value=value,
-        dtype=torch_dtype
-    )
-# Try to keep memory usage down
-del state_dict
-gc.collect()
-# Initialize models from base SD3.5
-vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae")
-text_encoder = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder")
-text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder_2")
-text_encoder_3 = T5EncoderModel.from_pretrained(model_repo_id, subfolder="text_encoder_3")
-tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer")
-tokenizer_2 = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_2")
-tokenizer_3 = T5Tokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_3")
-scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo_id, subfolder="scheduler")
-# Create pipeline from our models
-pipe = StableDiffusion3Pipeline(
-    vae=vae,
-    scheduler=scheduler,
-    text_encoder=text_encoder,
-    text_encoder_2=text_encoder_2,
-    text_encoder_3=text_encoder_3,
-    tokenizer=tokenizer,
-    tokenizer_2=tokenizer_2,
-    tokenizer_3=tokenizer_3,
-    transformer=transformer
-)
-pipe = pipe.to(device, dtype=torch_dtype)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1536
@@ -99,8 +30,10 @@ def infer(
     randomize_seed=False,
     width=1024,
     height=1024,
-    guidance_scale=4.5,
-    num_inference_steps=40,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
@@ -115,6 +48,8 @@ def infer(
         num_inference_steps=num_inference_steps,
         width=width,
         height=height,
         generator=generator,
     ).images[0]
@@ -122,7 +57,7 @@ def infer(
 examples = [
-    "An astrounaut encounters an alien on the moon, photograph",
 ]
 css = """
@@ -134,8 +69,7 @@ css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # [Absynth 2.0](https://huggingface.co/DoctorDiffusion/Absynth-2.0) by [DoctorDiffusion](https://civitai.com/user/doctor_diffusion)")
-        gr.Markdown("Finetuned from [Stable Diffusion 3.5 Large (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) by [Stability AI](https://stability.ai/news/introducing-stable-diffusion-3-5).")
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
@@ -163,7 +97,21 @@ with gr.Blocks(css=css) as demo:
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
@@ -172,7 +120,7 @@ with gr.Blocks(css=css) as demo:
                     minimum=512,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=768,
                 )
                 height = gr.Slider(
@@ -180,7 +128,7 @@ with gr.Blocks(css=css) as demo:
                     minimum=512,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
-                    value=1344,
                 )
             with gr.Row():
@@ -189,15 +137,15 @@ with gr.Blocks(css=css) as demo:
                     minimum=0.0,
                     maximum=7.5,
                     step=0.1,
-                    value=4.5,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
-                    maximum=50,
                     step=1,
-                    value=40,
                 )
         gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
@@ -214,6 +162,8 @@ with gr.Blocks(css=css) as demo:
             height,
             guidance_scale,
             num_inference_steps,
         ],
         outputs=[result, seed],
     )

 import torch
 import spaces
+from diffusers import Lumina2Text2ImgPipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_repo_id = "Alpha-VLLM/Lumina-Image-2.0"
 if torch.cuda.is_available():
     torch_dtype = torch.bfloat16
 else:
     torch_dtype = torch.float32
+pipe = Lumina2Text2ImgPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1536
     randomize_seed=False,
     width=1024,
     height=1024,
+    guidance_scale=4.0,
+    num_inference_steps=50,
+    cfg_normalization=True,
+    cfg_trunc_ratio=0.25,
     progress=gr.Progress(track_tqdm=True),
 ):
     if randomize_seed:
         num_inference_steps=num_inference_steps,
         width=width,
         height=height,
+        cfg_normalization=cfg_normalization,
+        cfg_trunc_ratio=cfg_trunc_ratio,
         generator=generator,
     ).images[0]
 examples = [
+    "A serene photograph capturing the golden reflection of the sun on a vast expanse of water. The sun is positioned at the top center, casting a brilliant, shimmering trail of light across the rippling surface. The water is textured with gentle waves, creating a rhythmic pattern that leads the eye towards the horizon. The entire scene is bathed in warm, golden hues, enhancing the tranquil and meditative atmosphere. High contrast, natural lighting, golden hour, photorealistic, expansive composition, reflective surface, peaceful, visually harmonious.",
 ]
 css = """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(" # [Lumina Image v2.0](https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0) by [Alpha-VLLM](https://huggingface.co/Alpha-VLLM)")
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 step=1,
                 value=0,
             )
+            with gr.Row():
+                cfg_normalization = gr.Checkbox(
+                    label="CFG Normalization",
+                    value=True
+                )
+                cfg_trunc_ratio = gr.Slider(
+                    label="CFG Truncation Ratio",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.05,
+                    value=0.25,
+                )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                     minimum=512,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=1024,
                 )
                 height = gr.Slider(
                     minimum=512,
                     maximum=MAX_IMAGE_SIZE,
                     step=32,
+                    value=1024,
                 )
             with gr.Row():
                     minimum=0.0,
                     maximum=7.5,
                     step=0.1,
+                    value=4.0,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
+                    maximum=100,
                     step=1,
+                    value=50,
                 )
         gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
             height,
             guidance_scale,
             num_inference_steps,
+            cfg_normalization,
+            cfg_trunc_ratio,
         ],
         outputs=[result, seed],
     )