Spaces:

alfredplpl
/

sd3-with-LLM

Running on Zero

File size: 5,235 Bytes

cd58335
42eccb2
c54d478
 
ec3c4e8
c54d478
ec3c4e8
 
718bcd6
c54d478
ec3c4e8
 
c54d478
ec3c4e8
4b94657
c54d478
c0c6c3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec3c4e8
 
c54d478
94d1cb1
 
c0c6c3f
 
 
 
 
22e8896
c0c6c3f
dc3aa6a
c379910
 
 
 
 
4b94657
718bcd6
ec3c4e8
 
 
 
 
 
 
 
 
c54d478
 
ec3c4e8
 
 
c54d478
 
ec3c4e8
 
 
 
 
 
 
c54d478
ec3c4e8
 
 
 
 
 
c54d478
ec3c4e8
c54d478
 
 
 
 
 
 
ec3c4e8
c54d478
ec3c4e8
 
 
 
 
c54d478
 
 
ec3c4e8
c54d478
ec3c4e8
 
 
 
 
 
 
c54d478
ec3c4e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c54d478
ec3c4e8
 
 
 
c54d478
 
ec3c4e8

# Thanks: https://huggingface.co/spaces/stabilityai/stable-diffusion-3-medium
import os
import gradio as gr
import numpy as np
import random
import torch
from diffusers import StableDiffusion3Pipeline, SD3Transformer2DModel, FlowMatchEulerDiscreteScheduler
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

device = "cuda"
dtype = torch.float16

repo = "stabilityai/stable-diffusion-3-medium"
t2i = StableDiffusion3Pipeline.from_pretrained(repo, torch_dtype=torch.float16, revision="refs/pr/26",token=os.environ["TOKEN"]).to(device)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct", 
    device_map="cuda", 
    torch_dtype=torch.bfloat16, 
    trust_remote_code=True, 
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
upsampler = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 300,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1344

@spaces.GPU
def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
    messages = [
        {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
        {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
        {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
    ]
    output = upsampler(messages, **generation_args)
    upsampled_prompt=output[0]['generated_text']

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
        
    generator = torch.Generator().manual_seed(seed)
    
    image = t2i(
        prompt = upsampled_prompt, 
        negative_prompt = negative_prompt,
        guidance_scale = guidance_scale, 
        num_inference_steps = num_inference_steps, 
        width = width, 
        height = height,
        generator = generator
    ).images[0] 
    
    return image, seed

examples = [
    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
    "An astronaut riding a green horse",
    "A delicious ceviche cheesecake slice",
]

css="""
#col-container {
    margin: 0 auto;
    max-width: 580px;
}
"""

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""
        # 日本語が入力できる [SD3 Medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium) 
        """)
        
        with gr.Row():
            
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )
            
            run_button = gr.Button("Run", scale=0)
        
        result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
            )
            
            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )
            
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
            
            with gr.Row():
                
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=64,
                    value=1024,
                )
                
                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=64,
                    value=1024,
                )
            
            with gr.Row():
                
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0.0,
                    maximum=10.0,
                    step=0.1,
                    value=5.0,
                )
                
                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=28,
                )
        
        gr.Examples(
            examples = examples,
            inputs = [prompt]
        )
    gr.on(
        triggers=[run_button.click, prompt.submit, negative_prompt.submit],
        fn = infer,
        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
        outputs = [result, seed]
    )

demo.launch()