Spaces:

ankitkupadhyay
/

funky_funkos

Build error

App Files Files

xet

Community

funky_funkos / app.py

ankitkupadhyay

initial app file

4512528 verified 6 months ago

raw

history blame

14.1 kB

	import streamlit as st
	import torch
	import random
	import subprocess
	import os
	from PIL import Image
	from diffusers import StableDiffusionPipeline, UNet2DConditionModel
	# ... any other imports you need ...

	# ------------------------------------------------------------------------------
	# 1. Load your models ONCE in global scope (so they don't reload on every run).
	# ------------------------------------------------------------------------------

	@st.cache_resource
	def load_sd_pipeline(base_model_path: str, fine_tuned_path: str):
	# Safety checker dummy function for demonstration:
	def dummy_safety_checker(images, clip_input):
	return images, False

	pipe = StableDiffusionPipeline.from_pretrained(
	base_model_path,
	torch_dtype=torch.float16
	)
	pipe.to("cuda")

	# Load the fine-tuned UNet
	unet = UNet2DConditionModel.from_pretrained(
	fine_tuned_path,
	subfolder="unet",
	torch_dtype=torch.float16
	).to('cuda')

	pipe.unet = unet
	pipe.safety_checker = dummy_safety_checker

	return pipe

	# Similarly, if you want to load Zero123++ or other pipelines:
	@st.cache_resource
	def load_zero123_pipeline():
	from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler

	pipeline = DiffusionPipeline.from_pretrained(
	"sudo-ai/zero123plus-v1.2",
	custom_pipeline="sudo-ai/zero123plus-pipeline",
	torch_dtype=torch.float16
	)
	pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
	pipeline.scheduler.config, timestep_spacing='trailing'
	)
	pipeline.to("cuda")
	return pipeline


	# Example placeholders for the SyncDreamer command or internal functions:
	def run_syncdreamer(input_path: str, output_dir: str = "syncdreamer_output"):
	"""Runs SyncDreamer on input_path and places results into output_dir."""
	st.info("Running SyncDreamer... (placeholder)")
	# This is where your actual command would go:
	# subprocess.run([...], check=True)
	os.makedirs(output_dir, exist_ok=True)
	# (In a real scenario, you'd handle .jpg to .png conversion, etc.)
	st.success(f"SyncDreamer completed. Results in: {output_dir}")


	# Helper function for Zero123++ pipeline
	def make_square_min_dim(image: Image.Image, min_side: int = 320) -> Image.Image:
	w, h = image.size
	scale = max(min_side / w, min_side / h, 1.0)
	new_w, new_h = int(w * scale), int(h * scale)
	image = image.resize((new_w, new_h), Image.LANCZOS)

	side = max(new_w, new_h)
	new_img = Image.new(mode="RGB", size=(side, side), color=(255, 255, 255))
	offset_x = (side - new_w) // 2
	offset_y = (side - new_h) // 2
	new_img.paste(image, (offset_x, offset_y))
	return new_img


	# ------------------------------------------------------------------------------
	# 2. Streamlit application.
	# ------------------------------------------------------------------------------

	def main():
	st.title("Funko Generator Demo")

	# Let’s load pipelines in the background:
	base_model_path = "runwayml/stable-diffusion-v1-5"
	fine_tuned_path = "/content/drive/MyDrive/CC_Project/checkpoint-3000" # adapt if needed
	sd_pipe = load_sd_pipeline(base_model_path, fine_tuned_path)

	zero123_pipe = load_zero123_pipeline() # For multi-view generation

	# Session state to hold:
	if "latest_image" not in st.session_state:
	st.session_state["latest_image"] = None
	if "original_prompt" not in st.session_state:
	st.session_state["original_prompt"] = ""

	# --------------------------------------------------------------------------
	# A) Prompt input & initial generation
	# --------------------------------------------------------------------------
	st.subheader("1. Enter your Funko prompt")

	# Show examples in the UI
	with st.expander("Examples of valid prompts"):
	st.write("""
	- A standing plain human Funko in a blue shirt and blue pants with round black eyes with glasses with a belt.
	- A sitting angry animal Funko with squint black eyes.
	- A standing happy robot Funko in a brown shirt and grey pants with squint black eyes with cane and monocle.
	- ...
	""")

	user_prompt = st.text_area("Type your Funko prompt here:",
	value="A standing plain human Funko in a blue shirt and blue pants with round black eyes with glasses.")
	generate_button = st.button("Generate Initial Funko")

	if generate_button:
	st.session_state["original_prompt"] = user_prompt
	with st.spinner("Generating image..."):
	with torch.autocast("cuda"):
	image = sd_pipe(user_prompt, num_inference_steps=50).images[0]
	st.session_state["latest_image"] = image

	st.success("Image generated!")

	if st.session_state["latest_image"] is not None:
	st.image(st.session_state["latest_image"], caption="Latest Generated Image", use_column_width=True)

	# --------------------------------------------------------------------------
	# B) Change the Funko (attributes)
	# --------------------------------------------------------------------------
	st.subheader("2. Modify Funko Attributes")
	st.write("Select new attributes below. If you choose 'none', that attribute will be ignored/omitted in the prompt.")

	# Possible attributes (from your code) — including 'none'
	characters = ['none', 'animal', 'human', 'robot']
	eyes_shape = ['none', 'anime', 'black', 'closed', 'round', 'square', 'squint']
	eyes_color = ['none', 'black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow']
	eyewear = ['none', 'eyepatch', 'glasses', 'goggles', 'helmet', 'mask', 'sunglasses']
	hair_color = ['none', 'black', 'blonde', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow']
	emotion = ['none', 'angry', 'happy', 'plain', 'sad']
	shirt_color = ['none', 'black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow']
	pants_color = ['none', 'black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow']
	accessories = ['none', 'bag', 'ball', 'belt', 'bird', 'book', 'cape', 'guitar', 'hat', 'helmet', 'sword', 'wand', 'wings']
	pose = ['none', 'sitting', 'standing']

	# Create selection widgets:
	chosen_char = st.selectbox("Character:", characters)
	chosen_eyes_shape = st.selectbox("Eyes Shape:", eyes_shape)
	chosen_eyes_color = st.selectbox("Eyes Color:", eyes_color)
	chosen_eyewear = st.selectbox("Eyewear:", eyewear)
	chosen_hair_color = st.selectbox("Hair Color:", hair_color)
	chosen_emotion = st.selectbox("Emotion:", emotion)
	chosen_shirt_color = st.selectbox("Shirt Color:", shirt_color)
	chosen_pants_color = st.selectbox("Pants Color:", pants_color)
	chosen_accessories = st.selectbox("Accessories:", accessories)
	chosen_pose = st.selectbox("Pose:", pose)

	# Now we form a modified prompt. For demonstration,
	# let's do something simple: we take the original prompt, parse it, and
	# replace only the attributes that are not 'none'.
	def modify_prompt(base_prompt: str):
	# A simple example: we can build a new prompt from scratch, ignoring the old text.
	# In reality, you might parse the old text or do something more sophisticated.
	new_prompt_segments = []

	# Pose
	if chosen_pose != 'none':
	new_prompt_segments.append(f"A {chosen_pose}")
	else:
	new_prompt_segments.append("A standing") # default fallback

	# Emotion + Character
	if chosen_emotion != 'none':
	new_prompt_segments.append(chosen_emotion)
	else:
	new_prompt_segments.append("plain") # fallback

	if chosen_char != 'none':
	new_prompt_segments.append(chosen_char + " Funko")
	else:
	new_prompt_segments.append("human Funko")

	# Shirt color
	if chosen_shirt_color != 'none':
	new_prompt_segments.append(f"in a {chosen_shirt_color} shirt")
	else:
	new_prompt_segments.append("in a blue shirt")

	# Pants color
	if chosen_pants_color != 'none':
	new_prompt_segments.append(f"and {chosen_pants_color} pants")
	else:
	new_prompt_segments.append("and blue pants")

	# Eyes
	eye_text = []
	if chosen_eyes_shape != 'none':
	eye_text.append(f"{chosen_eyes_shape}")
	else:
	eye_text.append("round")
	if chosen_eyes_color != 'none':
	eye_text.append(f"{chosen_eyes_color}")
	else:
	eye_text.append("black")
	eye_text.append("eyes")
	new_prompt_segments.append("with " + " ".join(eye_text))

	# Eyewear
	if chosen_eyewear != 'none':
	new_prompt_segments.append(f"with {chosen_eyewear}")

	# Hair
	if chosen_hair_color != 'none':
	new_prompt_segments.append(f"with {chosen_hair_color} hair")

	# Accessories
	if chosen_accessories != 'none':
	new_prompt_segments.append(f"with a {chosen_accessories}")

	return " ".join(new_prompt_segments) + "."

	if st.button("Generate Modified Funko"):
	if not st.session_state["original_prompt"]:
	st.warning("Please generate an initial Funko (step 1) before modifying it.")
	else:
	new_prompt = modify_prompt(st.session_state["original_prompt"])
	st.write(f"New Prompt: {new_prompt}")

	with st.spinner("Generating modified image..."):
	with torch.autocast("cuda"):
	image = sd_pipe(new_prompt, num_inference_steps=50).images[0]
	st.session_state["latest_image"] = image

	st.image(st.session_state["latest_image"], caption="Modified Image", use_column_width=True)

	# --------------------------------------------------------------------------
	# C) Animate the Funko with SyncDreamer
	# --------------------------------------------------------------------------
	st.subheader("3. Animate the Funko (SyncDreamer)")
	st.write("Click the button to run SyncDreamer on the last generated image. (Demo)")

	if st.button("Animate with SyncDreamer"):
	if st.session_state["latest_image"] is None:
	st.warning("No image found. Please generate a Funko first.")
	else:
	# Save latest image locally so SyncDreamer can process it
	input_path = "latest_funko.png"
	st.session_state["latest_image"].save(input_path)
	run_syncdreamer(input_path, output_dir="syncdreamer_output")

	# Optionally display a placeholder or actual frames/GIF
	# ...
	st.success("SyncDreamer animation completed (placeholder).")

	# --------------------------------------------------------------------------
	# D) Multi-View 3D Funko (Zero123++)
	# --------------------------------------------------------------------------
	st.subheader("4. Generate Multi-View 3D Funko (Zero123++)")

	if st.button("Generate Multi-View 3D"):
	if st.session_state["latest_image"] is None:
	st.warning("No image found. Please generate a Funko first.")
	else:
	# Save the last image as input for Zero123
	input_path = "funko_for_zero123.png"
	st.session_state["latest_image"].save(input_path)

	# Make sure image is at least 320x320 and square
	original_img = Image.open(input_path).convert("RGB")
	cond = make_square_min_dim(original_img, min_side=320)

	# Inference
	st.info("Running Zero123++ pipeline... Please wait.")
	with torch.autocast("cuda"):
	result_grid = zero123_pipe(cond, num_inference_steps=50).images[0]

	result_grid.save("zero123_grid.png")
	st.image(result_grid, caption="Zero123++ Multi-View Grid (640x960)")

	# Optionally crop and display sub-views
	# Here we crop 6 sub-images of 320x320 from the 640x960 grid:
	coords = [
	(0, 0, 320, 320),
	(320, 0, 640, 320),
	(0, 320, 320, 640),
	(320, 320, 640, 640),
	(0, 640, 320, 960),
	(320, 640, 640, 960),
	]
	st.write("### Generated Views:")
	for i, (x1, y1, x2, y2) in enumerate(coords):
	sub_img = result_grid.crop((x1, y1, x2, y2))
	sub_path = f"zero123_view_{i}.png"
	sub_img.save(sub_path)
	st.image(sub_path, width=256)

	# --------------------------------------------------------------------------
	# E) Integrate a New Background
	# --------------------------------------------------------------------------
	st.subheader("5. Apply a New Background to Each View")

	st.write("Upload a background image, then apply it to each previously generated view.")
	bg_file = st.file_uploader("Upload Background Image", type=["png", "jpg", "jpeg"])
	if bg_file is not None:
	st.image(bg_file, caption="Selected Background", width=200)

	if st.button("Apply Background to Multi-View"):
	if bg_file is None:
	st.warning("No background uploaded.")
	else:
	# Save background to disk:
	bg_path = "background.png"
	with open(bg_path, "wb") as f:
	f.write(bg_file.read())

	# In a real implementation, you would do the compositing described
	# in your original code with threshold-based masking, etc.
	# For demonstration, let's just say "Applied!"
	st.success("Background compositing placeholder done. Check your images in the output folder.")

	st.write("End of the Demo. Adjust code as needed for your pipeline paths and logic.")


	if __name__ == "__main__":
	main()