Spaces:

nostalgebraist
/

frank-diffusion-streamlit

Runtime error

File size: 4,681 Bytes

b7518ad
 
ba1e299
a54a213
 
 
 
 
 
 
 
 
 
 
 
ba1e299
a54a213
 
 
 
 
 
e4b5feb
f0e64d7
 
910bf72
 
 
f0e64d7
 
 
7943576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0e64d7
a54a213
1b3b3ce
a54a213
7cfb5aa
 
7943576
 
 
 
 
 
 
 
 
 
a54a213
693beab
a54a213
ba1e299
a54a213
1b3b3ce
 
 
ba1e299
 
e5509f2
 
4ef7cbc
1b3b3ce
693beab
 
 
 
910bf72
 
 
4ef7cbc
789fb56
1c987b2
2d98384
4ef7cbc
 
b43ef12
 
4a15936
1c987b2
 
 
 
4ef7cbc
4a15936
4ef7cbc
 
4a15936
4ef7cbc
 
1c987b2
 
4ef7cbc
 
4a15936
4ef7cbc
 
1c987b2
 
 
 
4a15936
b43ef12
ab4b236
1c987b2
693beab

import streamlit as st

import time
import numpy as np
from PIL import Image

# constants
HF_REPO_NAME_DIFFUSION = 'nostalgebraist/nostalgebraist-autoresponder-diffusion'
model_path_diffusion = 'nostalgebraist-autoresponder-diffusion'
timestep_respacing_sres1 = '20' # '90,60,60,20,20'
timestep_respacing_sres2 = '20' # '250'

DIFFUSION_DEFAULTS = dict(
    batch_size=1,
    n_samples=1,
    clf_free_guidance=True,
    clf_free_guidance_sres=False,
    guidance_scale=1,
    guidance_scale_sres=0,
    yield_intermediates=True
)

@st.experimental_singleton
def setup():
    import os, subprocess, sys
    if not os.path.exists('improved_diffusion'):
        os.system("git clone https://github.com/nostalgebraist/improved-diffusion.git")
    os.system("cd improved-diffusion && git fetch origin nbar-space && git checkout nbar-space && pip install -e .")
    os.system("pip install tokenizers x-transformers==0.22.0 axial-positional-embedding")
    os.system("pip install einops==0.3.2")
    sys.path.append("improved-diffusion")

    import improved_diffusion.pipeline
    from transformer_utils.util.tfm_utils import get_local_path_from_huggingface_cdn

    if not os.path.exists(model_path_diffusion):
        model_tar_name = 'model.tar'
        model_tar_path = get_local_path_from_huggingface_cdn(
            HF_REPO_NAME_DIFFUSION, model_tar_name
        )
        subprocess.run(f"tar -xf {model_tar_path} && rm {model_tar_path}", shell=True)

    checkpoint_path_sres1 = os.path.join(model_path_diffusion, "sres1.pt")
    config_path_sres1 = os.path.join(model_path_diffusion, "config_sres1.json")

    checkpoint_path_sres2 = os.path.join(model_path_diffusion, "sres2.pt")
    config_path_sres2 = os.path.join(model_path_diffusion, "config_sres2.json")

    # load
    sampling_model_sres1 = improved_diffusion.pipeline.SamplingModel.from_config(
        checkpoint_path=checkpoint_path_sres1,
        config_path=config_path_sres1,
        timestep_respacing=timestep_respacing_sres1
    )

    sampling_model_sres2 = improved_diffusion.pipeline.SamplingModel.from_config(
        checkpoint_path=checkpoint_path_sres2,
        config_path=config_path_sres2,
        timestep_respacing=timestep_respacing_sres2
    )

    pipeline = improved_diffusion.pipeline.SamplingPipeline(sampling_model_sres1, sampling_model_sres2)
    return pipeline


def handler(text, ts1, ts2, gs1):
    pipeline = setup()

    data = {'text': text[:380], 'guidance_scale': gs1}
    args = {k: v for k, v in DIFFUSION_DEFAULTS.items()}
    args.update(data)

    print(f"running: {args}")

    pipeline.base_model.set_timestep_respacing(str(ts1))
    pipeline.super_res_model.set_timestep_respacing(str(ts2))

    return pipeline.sample(**args)

st.title('nostalgebraist-autoresponder image generation demo')

text = st.text_area('Enter your text here (or leave blank for a textless image)', max_chars=380)

help_ts1 = "How long to run the base model. Larger values make the image more realistic / better. Smaller values are faster."
help_ts2 = "How long to run the upsampling model. Larger values sometimes make the big image crisper and more detailed. Smaller values are faster."
help_gs1 = "Guidance scale. Larger values make the image more likely to contain the text you wrote. If this is zero, the first part will be faster."

ts1 = st.slider('Steps (base)', min_value=5, max_value=500, value=10, help=help_ts1)
ts2 = st.slider('Steps (upsampling)', min_value=5, max_value=500, value=10, help=help_ts2)
gs1 = st.select_slider('Guidance scale (base)', [0.5*i for i in range(9)], value=0., help=help_gs1)


button_go = st.button('Generate')
button_stop = st.button('Stop')

if button_go:
    low_res = st.empty()
    high_res = st.empty()

    count_low_res, count_high_res = 0, 0
    times_low, times_high = [], []

    t = time.time()

    for s, xs in handler(text, ts1, ts2, gs1):
        s = Image.fromarray(s[0])
        xs = Image.fromarray(xs[0])

        t2 = time.time()
        delta = t2 - t
        t = t2

        is_high_res = s.size[0] == 256

        if is_high_res:
            target = high_res
            count_high_res += 1
            count = count_high_res
            total = ts2
            times_high.append(delta)
            times = times_high
        else:
            target = low_res
            count_low_res += 1
            count = count_low_res
            total = ts1
            times_low.append(delta)
            times = times_low

        rate = sum(times)/len(times)

        with target.container():
            st.image([s, xs])
            st.write(f'{count} / {total} | {rate:.2f} seconds/frame')

        if button_stop:
            break