Spaces:

andresampa
/

CtB-AI-img-gen

Sleeping

CtB-AI-img-gen / live-preview-example /app_live_example4.py

Andre

update 1.1

4f48282 9 months ago

3 kB

	import gradio as gr
	import numpy as np
	import random
	import torch
	from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
	from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
	from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
	import modal
	import random
	import io
	from config.config import prompts, models # Indirect import
	import os
	import sentencepiece
	from huggingface_hub import login
	from transformers import AutoTokenizer
	from datetime import datetime
	from PIL import Image



	CACHE_DIR = "/model_cache"

	# Define the Modal image
	image = (
	modal.Image.from_registry("nvidia/cuda:12.2.0-devel-ubuntu22.04", add_python="3.9")
	.pip_install_from_requirements("requirements.txt")
	#modal.Image.debian_slim(python_version="3.9") # Base image
	# .apt_install(
	# "git",
	# )
	# .pip_install(
	# "diffusers",
	# f"git+https://github.com/huggingface/transformers.git"
	# )
	.env(
	{
	"HF_HUB_ENABLE_HF_TRANSFER": "1", "HF_HOME": "HF_HOME", "HF_HUB_CACHE": CACHE_DIR
	}
	)
	)

	# Create a Modal app
	app = modal.App("img-gen-modal-live", image=image)
	with image.imports():
	import os

	flux_model_vol = modal.Volume.from_name("flux-model-vol", create_if_missing=True) # Reference your volume


	# GPU FUNCTION
	@app.function(volumes={"/data": flux_model_vol},
	secrets=[modal.Secret.from_name("huggingface-token")],
	gpu="L40S",
	timeout = 300
	)
	def main():

	def latents_to_rgb(latents):
	weights = (
	(60, -60, 25, -70),
	(60, -5, 15, -50),
	(60, 10, -5, -35),
	)

	weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device))
	biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device)
	rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1)
	image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0)

	return Image.fromarray(image_array)

	def decode_tensors(pipe, step, timestep, callback_kwargs):
	latents = callback_kwargs["latents"]

	image = latents_to_rgb(latents[0])
	image.save(f"{step}.png")

	return callback_kwargs
	model_name = "FLUX.1-dev"
	model_path = f"/data/{model_name}"

	pipeline = DiffusionPipeline.from_pretrained(
	model_path,
	torch_dtype=torch.bfloat16,
	use_safetensors=True
	).to("cuda")

	image = pipeline(
	prompt="A croissant shaped like a cute bear.",
	negative_prompt="Deformed, ugly, bad anatomy",
	width=300,
	height=200,
	callback_on_step_end=decode_tensors,
	callback_on_step_end_tensor_inputs=["latents"],
	).images[0]