Spaces:
Build error
Build error
import streamlit as st | |
import torch | |
import random | |
import subprocess | |
import os | |
from PIL import Image | |
from diffusers import StableDiffusionPipeline, UNet2DConditionModel | |
# ... any other imports you need ... | |
# ------------------------------------------------------------------------------ | |
# 1. Load your models ONCE in global scope (so they don't reload on every run). | |
# ------------------------------------------------------------------------------ | |
def load_sd_pipeline(base_model_path: str, fine_tuned_path: str): | |
# Safety checker dummy function for demonstration: | |
def dummy_safety_checker(images, clip_input): | |
return images, False | |
pipe = StableDiffusionPipeline.from_pretrained( | |
base_model_path, | |
torch_dtype=torch.float16 | |
) | |
pipe.to("cuda") | |
# Load the fine-tuned UNet | |
unet = UNet2DConditionModel.from_pretrained( | |
fine_tuned_path, | |
subfolder="unet", | |
torch_dtype=torch.float16 | |
).to('cuda') | |
pipe.unet = unet | |
pipe.safety_checker = dummy_safety_checker | |
return pipe | |
# Similarly, if you want to load Zero123++ or other pipelines: | |
def load_zero123_pipeline(): | |
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler | |
pipeline = DiffusionPipeline.from_pretrained( | |
"sudo-ai/zero123plus-v1.2", | |
custom_pipeline="sudo-ai/zero123plus-pipeline", | |
torch_dtype=torch.float16 | |
) | |
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config( | |
pipeline.scheduler.config, timestep_spacing='trailing' | |
) | |
pipeline.to("cuda") | |
return pipeline | |
# Example placeholders for the SyncDreamer command or internal functions: | |
def run_syncdreamer(input_path: str, output_dir: str = "syncdreamer_output"): | |
"""Runs SyncDreamer on input_path and places results into output_dir.""" | |
st.info("Running SyncDreamer... (placeholder)") | |
# This is where your actual command would go: | |
# subprocess.run([...], check=True) | |
os.makedirs(output_dir, exist_ok=True) | |
# (In a real scenario, you'd handle .jpg to .png conversion, etc.) | |
st.success(f"SyncDreamer completed. Results in: {output_dir}") | |
# Helper function for Zero123++ pipeline | |
def make_square_min_dim(image: Image.Image, min_side: int = 320) -> Image.Image: | |
w, h = image.size | |
scale = max(min_side / w, min_side / h, 1.0) | |
new_w, new_h = int(w * scale), int(h * scale) | |
image = image.resize((new_w, new_h), Image.LANCZOS) | |
side = max(new_w, new_h) | |
new_img = Image.new(mode="RGB", size=(side, side), color=(255, 255, 255)) | |
offset_x = (side - new_w) // 2 | |
offset_y = (side - new_h) // 2 | |
new_img.paste(image, (offset_x, offset_y)) | |
return new_img | |
# ------------------------------------------------------------------------------ | |
# 2. Streamlit application. | |
# ------------------------------------------------------------------------------ | |
def main(): | |
st.title("Funko Generator Demo") | |
# Let’s load pipelines in the background: | |
base_model_path = "runwayml/stable-diffusion-v1-5" | |
fine_tuned_path = "/content/drive/MyDrive/CC_Project/checkpoint-3000" # adapt if needed | |
sd_pipe = load_sd_pipeline(base_model_path, fine_tuned_path) | |
zero123_pipe = load_zero123_pipeline() # For multi-view generation | |
# Session state to hold: | |
if "latest_image" not in st.session_state: | |
st.session_state["latest_image"] = None | |
if "original_prompt" not in st.session_state: | |
st.session_state["original_prompt"] = "" | |
# -------------------------------------------------------------------------- | |
# A) Prompt input & initial generation | |
# -------------------------------------------------------------------------- | |
st.subheader("1. Enter your Funko prompt") | |
# Show examples in the UI | |
with st.expander("Examples of valid prompts"): | |
st.write(""" | |
- A standing plain human Funko in a blue shirt and blue pants with round black eyes with glasses with a belt. | |
- A sitting angry animal Funko with squint black eyes. | |
- A standing happy robot Funko in a brown shirt and grey pants with squint black eyes with cane and monocle. | |
- ... | |
""") | |
user_prompt = st.text_area("Type your Funko prompt here:", | |
value="A standing plain human Funko in a blue shirt and blue pants with round black eyes with glasses.") | |
generate_button = st.button("Generate Initial Funko") | |
if generate_button: | |
st.session_state["original_prompt"] = user_prompt | |
with st.spinner("Generating image..."): | |
with torch.autocast("cuda"): | |
image = sd_pipe(user_prompt, num_inference_steps=50).images[0] | |
st.session_state["latest_image"] = image | |
st.success("Image generated!") | |
if st.session_state["latest_image"] is not None: | |
st.image(st.session_state["latest_image"], caption="Latest Generated Image", use_column_width=True) | |
# -------------------------------------------------------------------------- | |
# B) Change the Funko (attributes) | |
# -------------------------------------------------------------------------- | |
st.subheader("2. Modify Funko Attributes") | |
st.write("Select new attributes below. If you choose 'none', that attribute will be ignored/omitted in the prompt.") | |
# Possible attributes (from your code) — including 'none' | |
characters = ['none', 'animal', 'human', 'robot'] | |
eyes_shape = ['none', 'anime', 'black', 'closed', 'round', 'square', 'squint'] | |
eyes_color = ['none', 'black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'] | |
eyewear = ['none', 'eyepatch', 'glasses', 'goggles', 'helmet', 'mask', 'sunglasses'] | |
hair_color = ['none', 'black', 'blonde', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'] | |
emotion = ['none', 'angry', 'happy', 'plain', 'sad'] | |
shirt_color = ['none', 'black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'] | |
pants_color = ['none', 'black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'] | |
accessories = ['none', 'bag', 'ball', 'belt', 'bird', 'book', 'cape', 'guitar', 'hat', 'helmet', 'sword', 'wand', 'wings'] | |
pose = ['none', 'sitting', 'standing'] | |
# Create selection widgets: | |
chosen_char = st.selectbox("Character:", characters) | |
chosen_eyes_shape = st.selectbox("Eyes Shape:", eyes_shape) | |
chosen_eyes_color = st.selectbox("Eyes Color:", eyes_color) | |
chosen_eyewear = st.selectbox("Eyewear:", eyewear) | |
chosen_hair_color = st.selectbox("Hair Color:", hair_color) | |
chosen_emotion = st.selectbox("Emotion:", emotion) | |
chosen_shirt_color = st.selectbox("Shirt Color:", shirt_color) | |
chosen_pants_color = st.selectbox("Pants Color:", pants_color) | |
chosen_accessories = st.selectbox("Accessories:", accessories) | |
chosen_pose = st.selectbox("Pose:", pose) | |
# Now we form a modified prompt. For demonstration, | |
# let's do something simple: we take the original prompt, parse it, and | |
# replace only the attributes that are not 'none'. | |
def modify_prompt(base_prompt: str): | |
# A simple example: we can build a new prompt from scratch, ignoring the old text. | |
# In reality, you might parse the old text or do something more sophisticated. | |
new_prompt_segments = [] | |
# Pose | |
if chosen_pose != 'none': | |
new_prompt_segments.append(f"A {chosen_pose}") | |
else: | |
new_prompt_segments.append("A standing") # default fallback | |
# Emotion + Character | |
if chosen_emotion != 'none': | |
new_prompt_segments.append(chosen_emotion) | |
else: | |
new_prompt_segments.append("plain") # fallback | |
if chosen_char != 'none': | |
new_prompt_segments.append(chosen_char + " Funko") | |
else: | |
new_prompt_segments.append("human Funko") | |
# Shirt color | |
if chosen_shirt_color != 'none': | |
new_prompt_segments.append(f"in a {chosen_shirt_color} shirt") | |
else: | |
new_prompt_segments.append("in a blue shirt") | |
# Pants color | |
if chosen_pants_color != 'none': | |
new_prompt_segments.append(f"and {chosen_pants_color} pants") | |
else: | |
new_prompt_segments.append("and blue pants") | |
# Eyes | |
eye_text = [] | |
if chosen_eyes_shape != 'none': | |
eye_text.append(f"{chosen_eyes_shape}") | |
else: | |
eye_text.append("round") | |
if chosen_eyes_color != 'none': | |
eye_text.append(f"{chosen_eyes_color}") | |
else: | |
eye_text.append("black") | |
eye_text.append("eyes") | |
new_prompt_segments.append("with " + " ".join(eye_text)) | |
# Eyewear | |
if chosen_eyewear != 'none': | |
new_prompt_segments.append(f"with {chosen_eyewear}") | |
# Hair | |
if chosen_hair_color != 'none': | |
new_prompt_segments.append(f"with {chosen_hair_color} hair") | |
# Accessories | |
if chosen_accessories != 'none': | |
new_prompt_segments.append(f"with a {chosen_accessories}") | |
return " ".join(new_prompt_segments) + "." | |
if st.button("Generate Modified Funko"): | |
if not st.session_state["original_prompt"]: | |
st.warning("Please generate an initial Funko (step 1) before modifying it.") | |
else: | |
new_prompt = modify_prompt(st.session_state["original_prompt"]) | |
st.write(f"**New Prompt**: {new_prompt}") | |
with st.spinner("Generating modified image..."): | |
with torch.autocast("cuda"): | |
image = sd_pipe(new_prompt, num_inference_steps=50).images[0] | |
st.session_state["latest_image"] = image | |
st.image(st.session_state["latest_image"], caption="Modified Image", use_column_width=True) | |
# -------------------------------------------------------------------------- | |
# C) Animate the Funko with SyncDreamer | |
# -------------------------------------------------------------------------- | |
st.subheader("3. Animate the Funko (SyncDreamer)") | |
st.write("Click the button to run SyncDreamer on the last generated image. (Demo)") | |
if st.button("Animate with SyncDreamer"): | |
if st.session_state["latest_image"] is None: | |
st.warning("No image found. Please generate a Funko first.") | |
else: | |
# Save latest image locally so SyncDreamer can process it | |
input_path = "latest_funko.png" | |
st.session_state["latest_image"].save(input_path) | |
run_syncdreamer(input_path, output_dir="syncdreamer_output") | |
# Optionally display a placeholder or actual frames/GIF | |
# ... | |
st.success("SyncDreamer animation completed (placeholder).") | |
# -------------------------------------------------------------------------- | |
# D) Multi-View 3D Funko (Zero123++) | |
# -------------------------------------------------------------------------- | |
st.subheader("4. Generate Multi-View 3D Funko (Zero123++)") | |
if st.button("Generate Multi-View 3D"): | |
if st.session_state["latest_image"] is None: | |
st.warning("No image found. Please generate a Funko first.") | |
else: | |
# Save the last image as input for Zero123 | |
input_path = "funko_for_zero123.png" | |
st.session_state["latest_image"].save(input_path) | |
# Make sure image is at least 320x320 and square | |
original_img = Image.open(input_path).convert("RGB") | |
cond = make_square_min_dim(original_img, min_side=320) | |
# Inference | |
st.info("Running Zero123++ pipeline... Please wait.") | |
with torch.autocast("cuda"): | |
result_grid = zero123_pipe(cond, num_inference_steps=50).images[0] | |
result_grid.save("zero123_grid.png") | |
st.image(result_grid, caption="Zero123++ Multi-View Grid (640x960)") | |
# Optionally crop and display sub-views | |
# Here we crop 6 sub-images of 320x320 from the 640x960 grid: | |
coords = [ | |
(0, 0, 320, 320), | |
(320, 0, 640, 320), | |
(0, 320, 320, 640), | |
(320, 320, 640, 640), | |
(0, 640, 320, 960), | |
(320, 640, 640, 960), | |
] | |
st.write("### Generated Views:") | |
for i, (x1, y1, x2, y2) in enumerate(coords): | |
sub_img = result_grid.crop((x1, y1, x2, y2)) | |
sub_path = f"zero123_view_{i}.png" | |
sub_img.save(sub_path) | |
st.image(sub_path, width=256) | |
# -------------------------------------------------------------------------- | |
# E) Integrate a New Background | |
# -------------------------------------------------------------------------- | |
st.subheader("5. Apply a New Background to Each View") | |
st.write("Upload a background image, then apply it to each previously generated view.") | |
bg_file = st.file_uploader("Upload Background Image", type=["png", "jpg", "jpeg"]) | |
if bg_file is not None: | |
st.image(bg_file, caption="Selected Background", width=200) | |
if st.button("Apply Background to Multi-View"): | |
if bg_file is None: | |
st.warning("No background uploaded.") | |
else: | |
# Save background to disk: | |
bg_path = "background.png" | |
with open(bg_path, "wb") as f: | |
f.write(bg_file.read()) | |
# In a real implementation, you would do the compositing described | |
# in your original code with threshold-based masking, etc. | |
# For demonstration, let's just say "Applied!" | |
st.success("Background compositing placeholder done. Check your images in the output folder.") | |
st.write("End of the Demo. Adjust code as needed for your pipeline paths and logic.") | |
if __name__ == "__main__": | |
main() | |