Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import torch | |
| from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL | |
| from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast | |
| from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images | |
| import modal | |
| import random | |
| import io | |
| from config.config import prompts, models # Indirect import | |
| import os | |
| import sentencepiece | |
| from huggingface_hub import login | |
| from transformers import AutoTokenizer | |
| from datetime import datetime | |
| from PIL import Image | |
| CACHE_DIR = "/model_cache" | |
| # Define the Modal image | |
| image = ( | |
| modal.Image.from_registry("nvidia/cuda:12.2.0-devel-ubuntu22.04", add_python="3.9") | |
| .pip_install_from_requirements("requirements.txt") | |
| #modal.Image.debian_slim(python_version="3.9") # Base image | |
| # .apt_install( | |
| # "git", | |
| # ) | |
| # .pip_install( | |
| # "diffusers", | |
| # f"git+https://github.com/huggingface/transformers.git" | |
| # ) | |
| .env( | |
| { | |
| "HF_HUB_ENABLE_HF_TRANSFER": "1", "HF_HOME": "HF_HOME", "HF_HUB_CACHE": CACHE_DIR | |
| } | |
| ) | |
| ) | |
| # Create a Modal app | |
| app = modal.App("img-gen-modal-live", image=image) | |
| with image.imports(): | |
| import os | |
| flux_model_vol = modal.Volume.from_name("flux-model-vol", create_if_missing=True) # Reference your volume | |
| # GPU FUNCTION | |
| def main(): | |
| def latents_to_rgb(latents): | |
| weights = ( | |
| (60, -60, 25, -70), | |
| (60, -5, 15, -50), | |
| (60, 10, -5, -35), | |
| ) | |
| weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device)) | |
| biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device) | |
| rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(-1).unsqueeze(-1) | |
| image_array = rgb_tensor.clamp(0, 255).byte().cpu().numpy().transpose(1, 2, 0) | |
| return Image.fromarray(image_array) | |
| def decode_tensors(pipe, step, timestep, callback_kwargs): | |
| latents = callback_kwargs["latents"] | |
| image = latents_to_rgb(latents[0]) | |
| image.save(f"{step}.png") | |
| return callback_kwargs | |
| model_name = "FLUX.1-dev" | |
| model_path = f"/data/{model_name}" | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.bfloat16, | |
| use_safetensors=True | |
| ).to("cuda") | |
| image = pipeline( | |
| prompt="A croissant shaped like a cute bear.", | |
| negative_prompt="Deformed, ugly, bad anatomy", | |
| width=300, | |
| height=200, | |
| callback_on_step_end=decode_tensors, | |
| callback_on_step_end_tensor_inputs=["latents"], | |
| ).images[0] | |