Spaces:

LPX55
/

Kontext-Multi_Lightning_4bit-nf4

Running on Zero

App Files Files Community

LPX commited on 26 days ago

Commit

15c3cb0

1 Parent(s): 0749e05

temp: attempt to upload half-precision .safetensors file

Browse files

Files changed (2) hide show

README.md +1 -1
float16.py +330 -0

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: red
 colorTo: yellow
 sdk: gradio
 sdk_version: 5.35.0
-app_file: app_kontext.py
 pinned: true
 short_description: Inspired by our 8-Step FLUX Merged/Fusion Models
 ---

 colorTo: yellow
 sdk: gradio
 sdk_version: 5.35.0
+app_file: float16.py
 pinned: true
 short_description: Inspired by our 8-Step FLUX Merged/Fusion Models
 ---

float16.py ADDED Viewed

	@@ -0,0 +1,330 @@

+import gradio as gr
+import numpy as np
+import spaces
+import torch
+import random
+import os
+import subprocess
+import logging
+import safetensors
+#####################################################
+# Forced Diffusers upgrade when cache was being stubborn; probably not needed now
+# force = subprocess.run("pip install -U diffusers", shell=True)
+# force = subprocess.run("pip install git+https://github.com/huggingface/diffusers.git", shell=True)
+# force = subprocess.run("pip install git+https://github.com/huggingface/transformers.git", shell=True)
+force = subprocess.run("git lfs install", shell=True)
+#####################################################
+import transformers
+import diffusers
+from diffusers import DiffusionPipeline
+import bitsandbytes
+from diffusers.quantizers import PipelineQuantizationConfig
+from diffusers.utils import load_image
+from diffusers import FluxKontextPipeline
+from PIL import Image
+from huggingface_hub import hf_hub_download
+from huggingface_hub import create_repo, upload_folder
+from huggingface_hub.utils._runtime import dump_environment_info
+from safetensors import safe_open
+#####################################################
+MAX_SEED = np.iinfo(np.int32).max
+API_TOKEN = os.environ['HF_TOKEN']
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
+os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
+dump_environment_info()
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+#####################################################
+# TESTING TWO QUANTIZATION METHODS
+# 1) If FP8 is supported; `torchao` for quantization
+# quant_config = PipelineQuantizationConfig(
+#     quant_backend="torchao",
+#     quant_kwargs={"quant_type": "float8dq_e4m3_row"},
+#     components_to_quantize=["transformer"]
+# )
+# 2) Otherwise, standard 4-bit quantization with bitsandbytes
+# quant_config = PipelineQuantizationConfig(
+#     quant_backend="bitsandbytes_4bit",
+#     quant_kwargs={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16, "bnb_4bit_quant_type": "nf4"},
+#     components_to_quantize=["transformer"]
+# )
+try:
+    # Set max memory usage for ZeroGPU
+    torch.cuda.set_per_process_memory_fraction(1.0)
+    torch.set_float32_matmul_precision("high")
+except Exception as e:
+    print(f"Error setting memory usage: {e}")
+#####################################################
+# Load the pipeline with the specified quantization configuration.
+# We use bfloat16 as the base dtype for mixed-precision inference.
+# HF Spaces VRAM (50 GB) is sufficient to hold the entire pipeline (31.424 GB),
+# Leave the entire pipeline to the GPU for the best performance.
+# FLUX.1 Dev Kontext Lightning Model / 8-Steps
+kontext_model = "LPX55/FLUX.1_Kontext-Lightning"
+pipe = FluxKontextPipeline.from_pretrained(
+    "LPX55/FLUX.1_Kontext-Lightning",
+    torch_dtype=torch.float16
+).to("cuda")
+# Save as a single `.safetensors` file
+pipe.save_pretrained(
+    "./flux_16bit",
+    safe_serialization=True,
+    max_shard_size="100GB"  # Forces all shards into one file (no split files)
+)
+local_folder = "./flux_16bit"
+hub_repo_name = "LPX55/FLUX.1_Kontext-Lightning"
+# create_repo(hub_repo_name, exist_ok=True, private=False)
+with safe_open("./flux_16bit/model.safetensors", framework="pt", device="cuda") as f:
+    for k in f.keys():
+        print(k, f.get_slice(k).shape)
+upload_folder(
+    folder_path=local_folder,
+    path_in_repo="float16",
+    repo_id=hub_repo_name,
+    repo_type="model",
+    commit_message="Upload half-precision FLUX.1 Kontext Lightning model",
+    token=API_TOKEN
+)
+###################################################
+# SECTION FOR LORA(S); SKIP FOR NOW
+# try:
+#     repo_name = ""
+#     ckpt_name = ""
+#     pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name), adapter_name="A1")
+#     pipe.set_adapters(["A1"], adapter_weights=[0.5])
+#     pipe.fuse_lora(adapter_names=["A1"], lora_scale=1.0)
+#     pipe.unload_lora_weights()
+# except Exception as e:
+#     print(f"Error while loading Lora: {e}")
+#####################################################
+def concatenate_images(images, direction="horizontal"):
+    """
+    Concatenate multiple PIL images either horizontally or vertically.
+    Args:
+        images: List of PIL Images
+        direction: "horizontal" or "vertical"
+    Returns:
+        PIL Image: Concatenated image
+    """
+    if not images:
+        return None
+    # Filter out None images
+    valid_images = [img for img in images if img is not None]
+    if not valid_images:
+        return None
+    if len(valid_images) == 1:
+        return valid_images[0].convert("RGB")
+    # Convert all images to RGB
+    valid_images = [img.convert("RGB") for img in valid_images]
+    if direction == "horizontal":
+        # Calculate total width and max height
+        total_width = sum(img.width for img in valid_images)
+        max_height = max(img.height for img in valid_images)
+        # Create new image
+        concatenated = Image.new('RGB', (total_width, max_height), (255, 255, 255))
+        # Paste images
+        x_offset = 0
+        for img in valid_images:
+            # Center image vertically if heights differ
+            y_offset = (max_height - img.height) // 2
+            concatenated.paste(img, (x_offset, y_offset))
+            x_offset += img.width
+    else:  # vertical
+        # Calculate max width and total height
+        max_width = max(img.width for img in valid_images)
+        total_height = sum(img.height for img in valid_images)
+        # Create new image
+        concatenated = Image.new('RGB', (max_width, total_height), (255, 255, 255))
+        # Paste images
+        y_offset = 0
+        for img in valid_images:
+            # Center image horizontally if widths differ
+            x_offset = (max_width - img.width) // 2
+            concatenated.paste(img, (x_offset, y_offset))
+            y_offset += img.height
+    return concatenated
+@spaces.GPU
+@torch.no_grad()
+def infer(input_images, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=8, width=1024, height=1024, progress=gr.Progress(track_tqdm=True)):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    # Handle input_images - it could be a single image or a list of images
+    if input_images is None:
+        raise gr.Error("Please upload at least one image.")
+    # If it's a single image (not a list), convert to list
+    if not isinstance(input_images, list):
+        input_images = [input_images]
+    # Filter out None images
+    valid_images = [img[0] for img in input_images if img is not None]
+    if not valid_images:
+        raise gr.Error("Please upload at least one valid image.")
+    # Concatenate images horizontally
+    concatenated_image = concatenate_images(valid_images, "horizontal")
+    if concatenated_image is None:
+        raise gr.Error("Failed to process the input images.")
+    # original_width, original_height = concatenated_image.size
+    # if original_width >= original_height:
+    #     new_width = 1024
+    #     new_height = int(original_height * (new_width / original_width))
+    #     new_height = round(new_height / 64) * 64
+    # else:
+    #     new_height = 1024
+    #     new_width = int(original_width * (new_height / original_height))
+    #     new_width = round(new_width / 64) * 64
+    #concatenated_image_resized = concatenated_image.resize((new_width, new_height), Image.LANCZOS)
+    final_prompt = f"From the provided reference images, create a unified, cohesive image such that {prompt}. Maintain the identity and characteristics of each subject while adjusting their proportions, scale, and positioning to create a harmonious, naturally balanced composition. Blend and integrate all elements seamlessly with consistent lighting, perspective, and style.the final result should look like a single naturally captured scene where all subjects are properly sized and positioned relative to each other, not assembled from multiple sources."
+    image = pipe(
+        image=concatenated_image,
+        prompt=final_prompt,
+        guidance_scale=guidance_scale,
+        width=width,
+        height=height,
+        max_area=width * height,
+        num_inference_steps=steps,
+        generator=torch.Generator().manual_seed(seed),
+    ).images[0]
+    return image, seed, gr.update(visible=True)
+css="""
+#col-container {
+    margin: 0 auto;
+    max-width: 86vw;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown(f"""# FLUX.1 Kontext | Lightning 8-Step Model ⚡
+        """)
+        with gr.Row():
+            with gr.Column():
+                input_images = gr.Gallery(
+                    label="Upload image(s) for editing",
+                    show_label=True,
+                    elem_id="gallery_input",
+                    columns=3,
+                    rows=2,
+                    object_fit="contain",
+                    height="auto",
+                    file_types=['image'],
+                    type='pil'
+                )
+                with gr.Row():
+                    prompt = gr.Text(
+                        label="Prompt",
+                        show_label=False,
+                        max_lines=1,
+                        placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
+                        container=False,
+                    )
+                    run_button = gr.Button("Run", scale=0)
+                with gr.Accordion("Advanced Settings", open=True):
+                    with gr.Group():
+                        width = gr.Slider(
+                            label="W",
+                            minimum=512,
+                            maximum=2560,
+                            step=64,
+                            value=1024,
+                        )
+                        height = gr.Slider(
+                            label="H",
+                            minimum=512,
+                            maximum=2560,
+                            step=64,
+                            value=1024,
+                        )
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        value=0,
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=1,
+                        maximum=10,
+                        step=0.1,
+                        value=2.5,
+                    )
+                    input_steps = gr.Slider(
+                        label="Steps",
+                        minimum=1,
+                        maximum=30,
+                        step=1,
+                        value=16,
+                    )
+            with gr.Column():
+                result = gr.Image(label="Result", show_label=False, interactive=False)
+                reuse_button = gr.Button("Reuse this image", visible=False)
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn = infer,
+        inputs = [input_images, prompt, seed, randomize_seed, guidance_scale, input_steps, width, height],
+        outputs = [result, seed, reuse_button]
+    )
+    reuse_button.click(
+        fn = lambda image: [image] if image is not None else [],  # Convert single image to list for gallery
+        inputs = [result],
+        outputs = [input_images]
+    )
+demo.queue().launch()