Andre
update 1.1
4f48282
import time
from io import BytesIO
from pathlib import Path
import modal
flux_image = (
cuda_dev_image.apt_install(
"git",
"libglib2.0-0",
"libsm6",
"libxrender1",
"libxext6",
"ffmpeg",
"libgl1",
)
.pip_install(
"invisible_watermark==0.2.0",
"transformers==4.44.0",
"huggingface_hub[hf_transfer]==0.26.2",
"accelerate==0.33.0",
"safetensors==0.4.4",
"sentencepiece==0.2.0",
"torch==2.5.0",
f"git+https://github.com/huggingface/diffusers.git@{diffusers_commit_sha}",
"numpy<2",
)
#.env({"HF_TOKEN": "1", "HF_HUB_CACHE_DIR": "/cache"})
)
# flux_image = flux_image.env(
# {
# "TORCHINDUCTOR_CACHE_DIR": "/root/.inductor-cache",
# "TORCHINDUCTOR_FX_GRAPH_CACHE": "1",
# }
# )
with flux_image.imports():
import torch
from diffusers import FluxPipeline
MINUTES = 60 # seconds
VARIANT = "schnell" # or "dev", but note [dev] requires you to accept terms and conditions on HF
NUM_INFERENCE_STEPS = 40 # use ~50 for [dev], smaller for [schnell]
app = modal.App("example-flux", image=flux_image)
@app.local_entrypoint()
def main ():
cuda_version = "12.4.0" # should be no greater than host CUDA version
flavor = "devel" # includes full CUDA toolkit
operating_sys = "ubuntu22.04"
tag = f"{cuda_version}-{flavor}-{operating_sys}"
cuda_dev_image = modal.Image.from_registry(
f"nvidia/cuda:{tag}", add_python="3.11"
).entrypoint([])
diffusers_commit_sha = "81cf3b2f155f1de322079af28f625349ee21ec6b"
@app.cls(
gpu="H100", # fastest GPU on Modal
container_idle_timeout=20 * MINUTES,
timeout=60 * MINUTES, # leave plenty of time for compilation
volumes={ # add Volumes to store serializable compilation artifacts, see section on torch.compile below
"/cache": modal.Volume.from_name(
"hf-hub-cache", create_if_missing=True
),
"/root/.nv": modal.Volume.from_name("nv-cache", create_if_missing=True),
"/root/.triton": modal.Volume.from_name(
"triton-cache", create_if_missing=True
),
"/root/.inductor-cache": modal.Volume.from_name(
"inductor-cache", create_if_missing=True
),
},
)
class Model:
compile: int = ( # see section on torch.compile below for details
modal.parameter(default=0)
)
@modal.enter()
def enter(self):
pipe = FluxPipeline.from_pretrained(
f"black-forest-labs/FLUX.1-{VARIANT}", torch_dtype=torch.bfloat16
).to("cuda") # move model to GPU
self.pipe = optimize(pipe, compile=bool(self.compile))
@modal.method()
def inference(self, prompt: str) -> bytes:
print("🎨 generating image...")
out = self.pipe(
prompt,
output_type="pil",
num_inference_steps=NUM_INFERENCE_STEPS,
).images[0]
byte_stream = BytesIO()
out.save(byte_stream, format="JPEG")
return byte_stream.getvalue()