Spaces:

jiuface
/

real-depth

Running on Zero

File size: 7,269 Bytes

d0852ef
1486d19
 
 
d731134
1486d19
d0852ef
 
 
 
 
 
 
 
 
 
 
 
d74c6d6
91d2029
 
dd41079
762ec4a
d74c6d6
d0852ef
42a5c12
d0852ef
 
 
1486d19
d74c6d6
 
 
ea8efbb
 
c46f10f
ea8efbb
55b3c41
ea8efbb
55b3c41
d74c6d6
 
 
eebdcc1
c46f10f
d74c6d6
55b3c41
d74c6d6
 
 
d0852ef
 
 
1486d19
 
 
 
 
d0852ef
 
 
1486d19
d0852ef
1486d19
 
d0852ef
 
 
3594837
d731134
3594837
d0852ef
 
 
 
 
d731134
d0852ef
 
 
 
 
 
 
 
 
3594837
1486d19
 
d0852ef
3594837
dd41079
0a8ba8d
 
91d2029
 
0a8ba8d
 
91d2029
 
 
dd41079
 
 
91d2029
 
 
0a8ba8d
 
 
91d2029
 
d0852ef
2e320bd
d792bfc
5f87b7d
d0852ef
5f87b7d
d0852ef
fc2d50f
d731134
fc2d50f
 
3594837
527d179
d0852ef
d792bfc
3594837
fc2d50f
d0852ef
 
fc2d50f
 
d0852ef
 
 
 
ea8efbb
d0852ef
3594837
91d2029
527d179
91d2029
 
 
 
3594837
1486d19
d0852ef
 
 
 
 
 
 
 
1486d19
d0852ef
1486d19
d0852ef
 
 
 
 
 
 
 
1486d19
91d2029
0a8ba8d
 
 
 
 
91d2029
 
d0852ef
3594837
d0852ef
1486d19
d0852ef
 
 
 
 
 
 
 
91d2029
 
0a8ba8d
91d2029
 
 
d0852ef
1486d19
d0852ef
 
 
 
 
 
 
 
3594837
 
d0852ef
1486d19

import spaces
import gradio as gr
import numpy as np
import random
from PIL import Image
import torch
from diffusers import (
    ControlNetModel,
    DiffusionPipeline,
    StableDiffusionControlNetPipeline,
    StableDiffusionXLControlNetPipeline,
    UniPCMultistepScheduler,
    EulerDiscreteScheduler,
    AutoencoderKL
)
from transformers import DPTFeatureExtractor, DPTForDepthEstimation, DPTImageProcessor
from transformers import CLIPImageProcessor
from diffusers.utils import load_image
from gradio_imageslider import ImageSlider
import boto3
from io import BytesIO
from datetime import datetime
import json

device = "cuda"
base_model_id = "SG161222/RealVisXL_V5.0"
controlnet_model_id = "diffusers/controlnet-depth-sdxl-1.0"
vae_model_id = "madebyollin/sdxl-vae-fp16-fix"


if torch.cuda.is_available():

    # load pipe
    controlnet = ControlNetModel.from_pretrained(
        controlnet_model_id, 
        variant="fp16",
        use_safetensors=True,
        torch_dtype=torch.bfloat16
    )
    vae = AutoencoderKL.from_pretrained(vae_model_id, torch_dtype=torch.bfloat16)
    pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
        base_model_id, 
        controlnet=controlnet, 
        vae=vae,
        variant="fp16",
        use_safetensors=True,
        torch_dtype=torch.bfloat16,
    )
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe.to(device)

depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

USE_TORCH_COMPILE = 0
ENABLE_CPU_OFFLOAD = 0


def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed


def get_depth_map(image):
    original_size = (image.size[1], image.size[0]) 
    print("start generate depth", original_size)
    image = feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
    with torch.no_grad(), torch.autocast("cuda"):
        depth_map = depth_estimator(image).predicted_depth
    depth_map = torch.nn.functional.interpolate(
        depth_map.unsqueeze(1),
        size=original_size,
        mode="bicubic",
        align_corners=False,
    )
    depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
    depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
    depth_map = (depth_map - depth_min) / (depth_max - depth_min)
    image = torch.cat([depth_map] * 3, dim=1)
    image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
    image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
    print("generate depth success")
    return image


def upload_image_to_s3(image, account_id, access_key, secret_key, bucket_name):
    print("upload_image_to_s3", account_id, access_key, secret_key, bucket_name)
    connectionUrl = f"https://{account_id}.r2.cloudflarestorage.com"

    s3 = boto3.client(
        's3',
        endpoint_url=connectionUrl,
        region_name='auto',
        aws_access_key_id=access_key,
        aws_secret_access_key=secret_key
    )

    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    image_file = f"generated_images/{current_time}_{random.randint(0, MAX_SEED)}.png"
    buffer = BytesIO()
    image.save(buffer, "PNG")
    buffer.seek(0)
    s3.upload_fileobj(buffer, bucket_name, image_file)
    print("upload finish", image_file)
    return image_file



@spaces.GPU(duration=120)
def process(image, image_url, prompt, n_prompt, num_steps, guidance_scale, control_strength, seed, upload_to_s3, account_id, access_key, secret_key, bucket, progress=gr.Progress(track_tqdm=True)):
    print("process start")
    if image_url:
        print(image_url)
        orginal_image = load_image(image_url)
    else:
        orginal_image = Image.fromarray(image)

    size = (orginal_image.size[0], orginal_image.size[1])
    print("gorinal image size", size)
    depth_image = get_depth_map(orginal_image)
    generator = torch.Generator().manual_seed(seed)
    print(prompt, n_prompt, guidance_scale, num_steps, control_strength)
    print("run pipe")
    generated_image = pipe(
        prompt=prompt,
        negative_prompt=n_prompt,
        width=size[0],
        height=size[1],
        guidance_scale=guidance_scale,
        num_inference_steps=num_steps,
        strength=control_strength,
        generator=generator,
        image=depth_image
    ).images[0]
    print("geneate image success")
    if upload_to_s3:
        url = upload_image_to_s3(generated_image, account_id, access_key, secret_key, bucket)
        result = {"status": "success", "url": url}
    else:
        result = {"status": "success", "message": "Image generated but not uploaded"}
    
    return generated_image, json.dumps(result)

with gr.Blocks() as demo:
    
    with gr.Row():
        with gr.Column():
            image = gr.Image()
            image_url = gr.Textbox(label="Image Url", placeholder="Enter image URL here (optional)")
            prompt = gr.Textbox(label="Prompt")
            run_button = gr.Button("Run")
            
            with gr.Accordion("Advanced options", open=True):
                
                num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=30, step=1)
                guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1)
                control_strength = gr.Slider(label="Control Strength", minimum=0.1, maximum=4.0, value=0.8, step=0.1)
                seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                n_prompt = gr.Textbox(
                    label="Negative prompt",
                    value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
                )

                upload_to_s3 = gr.Checkbox(label="Upload to R2", value=False)
                account_id = gr.Textbox(label="Account Id", placeholder="Enter R2 account id")
                access_key = gr.Textbox(label="Access Key", placeholder="Enter R2 access key here")
                secret_key = gr.Textbox(label="Secret Key", placeholder="Enter R2 secret key here")
                bucket = gr.Textbox(label="Bucket Name", placeholder="Enter R2 bucket name here")
        

        with gr.Column():
            result = gr.Image(label="Generated Image")
            logs = gr.Textbox(label="logs")
            
    inputs = [
        image,
        image_url,
        prompt,
        n_prompt,
        num_steps,
        guidance_scale,
        control_strength,
        seed,
        upload_to_s3,
        account_id,
        access_key,
        secret_key,
        bucket
    ]
    run_button.click(
            fn=randomize_seed_fn,
            inputs=[seed, randomize_seed],
            outputs=seed,
            queue=False,
            api_name=False,
        ).then(
            fn=process,
            inputs=inputs,
            outputs=[result, logs],
            api_name="predict"
        )

demo.queue().launch()