import torch import gradio as gr from pipeline_controlnet_sd_xl_raw import StableDiffusionXLControlNetRAWPipeline from diffusers import ControlNetModel, UniPCMultistepScheduler from torchvision import transforms from PIL import Image import traceback # ========== 1. Load Models ========== # base_model_path = "stabilityai/stable-diffusion-xl-base-1.0" # controlnet_path = "/mnt/wencheng/RAWPami/diffusers/examples/controlnet/controlnet-model" # controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16) # pipe = StableDiffusionXLControlNetRAWPipeline.from_pretrained( # base_model_path, # controlnet=controlnet, # torch_dtype=torch.float16 # ) pipe = StableDiffusionXLControlNetRAWPipeline.from_pretrained( "wencheng256/DiffusionRAW", torch_dtype=torch.float16 ) pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) pipe.enable_model_cpu_offload() # ========== 2. Utility function: tensor -> PIL ========== def tensor_to_pil(img_tensor: torch.Tensor) -> Image.Image: if img_tensor.is_cuda: img_tensor = img_tensor.cpu() if img_tensor.dtype != torch.float32: img_tensor = img_tensor.float() img_tensor = img_tensor.clamp(0, 1) return transforms.ToPILImage()(img_tensor) # ========== 3. Load a .pth file ========== def load_pth_data(pth_path): data = torch.load(pth_path) rgb_tensor = data["rgb"] raw_tensor = data["raw"] mask_tensor = data["mask"] cond_tensor = data["condition"] # Assuming each key can contain multiple images; using the first index only raw_image_pil = tensor_to_pil(raw_tensor[0][:, :448]) rgb_tensor = tensor_to_pil(torch.flip(rgb_tensor[0], dims=[0])[:, :448]) mask_image_pil = tensor_to_pil(1 - mask_tensor[0]) return rgb_tensor, raw_image_pil, mask_image_pil, raw_tensor, mask_tensor, cond_tensor # ========== 4. Inference function ========== def infer_fn(prompt, mask_edited, raw_tensor_state, mask_tensor_state, cond_tensor_state): """ mask_edited: using tool='sketch' returns a dict containing {'image': PIL, 'mask': PIL}. """ try: if isinstance(mask_edited, dict): # Usually we only need the drawn mask mask_edited = mask_edited["mask"] mask_edited_tensor = transforms.ToTensor()(mask_edited) # Keep only one channel as grayscale mask mask_edited_tensor = mask_edited_tensor[:1] mask_edited_tensor = mask_edited_tensor.unsqueeze(0).half() raw_t = raw_tensor_state.half() cond_t = cond_tensor_state.half() generator = torch.manual_seed(0) print("Mask shape:", mask_edited_tensor.shape) print("Raw shape:", raw_t.shape) print("Cond shape:", cond_t.shape) result = pipe( prompt=prompt, num_inference_steps=20, generator=generator, image=raw_t, mask_image=mask_edited_tensor, control_image=cond_t ).images[0] return tensor_to_pil(result) except Exception as e: traceback.print_exc() return "Error occurred during inference. Please check the terminal logs!" def build_demo(): with gr.Blocks() as demo: gr.Markdown("# DiffusionRAW ") # Provide a dropdown to select pth file pth_options = ["./data1.pth", "./data2.pth", "./data3.pth"] with gr.Row(): pth_selector = gr.Dropdown( pth_options, value=pth_options[0], label="Select a PTH file" ) load_button = gr.Button("Load") with gr.Row(): # Display the raw image raw_display = gr.Image( label="Raw Image (Display Only)", interactive=False, ) rgb_display = gr.Image( label="sRGB Image (Display Only)", interactive=False, ) # Mask editor with sketch tool mask_editor = gr.Image( label="Mask (Sketch)", tool="sketch", type="pil", brush_color="#FFFFFF", interactive=True, width=512, height=512 ) # States to store tensors raw_tensor_state = gr.State() mask_tensor_state = gr.State() cond_tensor_state = gr.State() load_button.click( fn=load_pth_data, inputs=[pth_selector], outputs=[ rgb_display, raw_display, mask_editor, raw_tensor_state, mask_tensor_state, cond_tensor_state ] ) prompt_input = gr.Textbox(label="Prompt", value="An RAW Image.", lines=1) generate_button = gr.Button("Generate") output_image = gr.Image(label="Output", show_download_button=False) generate_button.click( fn=infer_fn, inputs=[ prompt_input, mask_editor, raw_tensor_state, mask_tensor_state, cond_tensor_state ], outputs=[output_image] ) return demo if __name__ == "__main__": demo = build_demo() demo.launch(server_name="0.0.0.0", server_port=9112, debug=True)