Lumina-Image-2.0

Runtime error

App Files Files Community

benjamin-paine commited on Jan 19

Commit

fd83843

verified ·

1 Parent(s): 6e5e1d5

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -5

app.py CHANGED Viewed

@@ -1,21 +1,77 @@
 import gradio as gr
 import numpy as np
 import random
-import spaces
-from diffusers import DiffusionPipeline
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "stabilityai/stable-diffusion-3.5-large"
 if torch.cuda.is_available():
     torch_dtype = torch.bfloat16
 else:
     torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024

 import gradio as gr
 import numpy as np
 import random
+import json
 import torch
+import spaces
+from diffusers import AutoencoderKL, SD3Transformer2DModel, StableDiffusion3Pipeline
+from diffusers.loaders.single_file_utils import convert_sd3_transformer_checkpoint_to_diffusers
+from huggingface_hub import hf_hub_download
+from transformers import (
+    CLIPTextModelWithProjection,
+    CLIPTokenizer,
+    T5EncoderModel,
+    T5Tokenizer
+)
+from accelerate import (
+    init_empty_weights,
+    set_module_tensor_to_device,
+    infer_auto_device_map,
+    load_checkpoint_and_dispatch
+)
+from safetensors import safe_open
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "stabilityai/stable-diffusion-3.5-large"
+finetune_repo_id = "DoctorDiffusion/Absynth-2.0"
+finetune_filename = "Absynth_SD3.5L_2.0.safetensors"
 if torch.cuda.is_available():
     torch_dtype = torch.bfloat16
 else:
     torch_dtype = torch.float32
+# Initialize models from base SD3.5
+vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae")
+text_encoder = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder")
+text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder_2")
+text_encoder_3 = T5EncoderModel.from_pretrained(mdoel_repo_id, subfolder="text_encoder_3")
+tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer")
+tokenizer_2 = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_2")
+tokenizer_3 = T5Tokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_3")
+# Initialize transformer
+config_file = hf_hub_download(repo_id=model_repo_id, filename="transformer/config.json")
+with open(config_file, "r") as fp:
+    config = json.loads(fp)
+with no_init_weights():
+    transformer = SD3Transformer2DModel.from_config(config)
+# Get transformer state dict and load
+model_file = hf_hub_download(repo_id=finetune_repo_id, filename=finetune_filename)
+state_dict = {}
+with safe_open(model_file, framework="pt") as f:
+    for key in f.keys():
+        state_dict[key] = f.get_tensor(key)
+state_dict = convert_sd3_transformer_checkpoint_to_diffusers(state_dict)
+transformer.load_state_dict(state_dict)
+# Create pipeline from our models
+pipe = StableDiffusion3Pipeline(
+    vae=vae,
+    text_encoder=text_encoder,
+    text_encoder_2=text_encoder_2,
+    text_encoder_3=text_encoder_3,
+    tokenizer=tokenizer,
+    tokenizer_2=tokenizer_2,
+    tokenizer_3=tokenizer_3,
+    transformer=transformer
+)
+pipe = pipe.to(device, dtype=torch_dtype)
+# The rest of the code is from the official SD3.5 space
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024