Spaces:
Sleeping
Sleeping
| import torch | |
| import gradio as gr | |
| from pipeline_controlnet_sd_xl_raw import StableDiffusionXLControlNetRAWPipeline | |
| from diffusers import ControlNetModel, UniPCMultistepScheduler | |
| from torchvision import transforms | |
| from PIL import Image | |
| import traceback | |
| # ========== 1. Load Models ========== | |
| # base_model_path = "stabilityai/stable-diffusion-xl-base-1.0" | |
| # controlnet_path = "/mnt/wencheng/RAWPami/diffusers/examples/controlnet/controlnet-model" | |
| # controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16) | |
| # pipe = StableDiffusionXLControlNetRAWPipeline.from_pretrained( | |
| # base_model_path, | |
| # controlnet=controlnet, | |
| # torch_dtype=torch.float16 | |
| # ) | |
| pipe = StableDiffusionXLControlNetRAWPipeline.from_pretrained( | |
| "wencheng256/DiffusionRAW", | |
| torch_dtype=torch.float16 | |
| ) | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
| pipe.enable_model_cpu_offload() | |
| # ========== 2. Utility function: tensor -> PIL ========== | |
| def tensor_to_pil(img_tensor: torch.Tensor) -> Image.Image: | |
| if img_tensor.is_cuda: | |
| img_tensor = img_tensor.cpu() | |
| if img_tensor.dtype != torch.float32: | |
| img_tensor = img_tensor.float() | |
| img_tensor = img_tensor.clamp(0, 1) | |
| return transforms.ToPILImage()(img_tensor) | |
| # ========== 3. Load a .pth file ========== | |
| def load_pth_data(pth_path): | |
| data = torch.load(pth_path) | |
| rgb_tensor = data["rgb"] | |
| raw_tensor = data["raw"] | |
| mask_tensor = data["mask"] | |
| cond_tensor = data["condition"] | |
| # Assuming each key can contain multiple images; using the first index only | |
| raw_image_pil = tensor_to_pil(raw_tensor[0][:, :448]) | |
| rgb_tensor = tensor_to_pil(torch.flip(rgb_tensor[0], dims=[0])[:, :448]) | |
| mask_image_pil = tensor_to_pil(1 - mask_tensor[0]) | |
| return rgb_tensor, raw_image_pil, mask_image_pil, raw_tensor, mask_tensor, cond_tensor | |
| # ========== 4. Inference function ========== | |
| def infer_fn(prompt, mask_edited, raw_tensor_state, mask_tensor_state, cond_tensor_state): | |
| """ | |
| mask_edited: using tool='sketch' returns a dict containing {'image': PIL, 'mask': PIL}. | |
| """ | |
| try: | |
| if isinstance(mask_edited, dict): | |
| # Usually we only need the drawn mask | |
| mask_edited = mask_edited["mask"] | |
| mask_edited_tensor = transforms.ToTensor()(mask_edited) | |
| # Keep only one channel as grayscale mask | |
| mask_edited_tensor = mask_edited_tensor[:1] | |
| mask_edited_tensor = mask_edited_tensor.unsqueeze(0).half() | |
| raw_t = raw_tensor_state.half() | |
| cond_t = cond_tensor_state.half() | |
| generator = torch.manual_seed(0) | |
| print("Mask shape:", mask_edited_tensor.shape) | |
| print("Raw shape:", raw_t.shape) | |
| print("Cond shape:", cond_t.shape) | |
| result = pipe( | |
| prompt=prompt, | |
| num_inference_steps=20, | |
| generator=generator, | |
| image=raw_t, | |
| mask_image=mask_edited_tensor, | |
| control_image=cond_t | |
| ).images[0] | |
| return tensor_to_pil(result) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return "Error occurred during inference. Please check the terminal logs!" | |
| def build_demo(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# DiffusionRAW ") | |
| # Provide a dropdown to select pth file | |
| pth_options = ["./data1.pth", "./data2.pth", "./data3.pth"] | |
| with gr.Row(): | |
| pth_selector = gr.Dropdown( | |
| pth_options, | |
| value=pth_options[0], | |
| label="Select a PTH file" | |
| ) | |
| load_button = gr.Button("Load") | |
| with gr.Row(): | |
| # Display the raw image | |
| raw_display = gr.Image( | |
| label="Raw Image (Display Only)", | |
| interactive=False, | |
| ) | |
| rgb_display = gr.Image( | |
| label="sRGB Image (Display Only)", | |
| interactive=False, | |
| ) | |
| # Mask editor with sketch tool | |
| mask_editor = gr.Image( | |
| label="Mask (Sketch)", | |
| tool="sketch", | |
| type="pil", | |
| brush_color="#FFFFFF", | |
| interactive=True, | |
| width=512, | |
| height=512 | |
| ) | |
| # States to store tensors | |
| raw_tensor_state = gr.State() | |
| mask_tensor_state = gr.State() | |
| cond_tensor_state = gr.State() | |
| load_button.click( | |
| fn=load_pth_data, | |
| inputs=[pth_selector], | |
| outputs=[ | |
| rgb_display, | |
| raw_display, | |
| mask_editor, | |
| raw_tensor_state, | |
| mask_tensor_state, | |
| cond_tensor_state | |
| ] | |
| ) | |
| prompt_input = gr.Textbox(label="Prompt", value="An RAW Image.", lines=1) | |
| generate_button = gr.Button("Generate") | |
| output_image = gr.Image(label="Output", show_download_button=False) | |
| generate_button.click( | |
| fn=infer_fn, | |
| inputs=[ | |
| prompt_input, | |
| mask_editor, | |
| raw_tensor_state, | |
| mask_tensor_state, | |
| cond_tensor_state | |
| ], | |
| outputs=[output_image] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_demo() | |
| demo.launch(server_name="0.0.0.0", server_port=9112, debug=True) | |