Spaces:

gokaygokay
/

FLUX.1-dev-with-Captioner

Running on Zero

App Files Files Community

gokaygokay commited on Oct 27, 2024

Commit

fa09fe7

verified ·

1 Parent(s): 3d535fa

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -130

app.py CHANGED Viewed

@@ -1,131 +1,131 @@
-import os
-import tempfile
-import time
-import gradio as gr
-import torch
-from PIL import Image
-from diffusers import FluxPipeline
-from huggingface_hub import hf_hub_download
-from sf3d.system import SF3D
-import sf3d.utils as sf3d_utils
-from gradio_litmodel3d import LitModel3D
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.bfloat16
-torch.backends.cuda.matmul.allow_tf32 = True
-huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
-# Set up environment and cache
-cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
-os.environ["TRANSFORMERS_CACHE"] = cache_path
-os.environ["HF_HUB_CACHE"] = cache_path
-os.environ["HF_HOME"] = cache_path
-if not os.path.exists(cache_path):
-    os.makedirs(cache_path, exist_ok=True)
-# Initialize Flux pipeline
-pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=huggingface_token)
-pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
-pipe.fuse_lora(lora_scale=0.125)
-pipe.to(device="cuda", dtype=torch.bfloat16)
-# Initialize SF3D model
-sf3d_model = SF3D.from_pretrained(
-    "stabilityai/stable-fast-3d",
-    config_name="config.yaml",
-    weight_name="model.safetensors",
-    token=huggingface_token
-)
-sf3d_model.eval().cuda()
-# Constants for SF3D
-COND_WIDTH, COND_HEIGHT = 512, 512
-COND_DISTANCE, COND_FOVY_DEG = 1.6, 40
-BACKGROUND_COLOR = [0.5, 0.5, 0.5]
-c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
-intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
-    COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
-)
-def generate_image(prompt, height, width, steps, scales, seed):
-    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
-        return pipe(
-            prompt=[prompt],
-            generator=torch.Generator().manual_seed(int(seed)),
-            num_inference_steps=int(steps),
-            guidance_scale=float(scales),
-            height=int(height),
-            width=int(width),
-            max_sequence_length=256
-        ).images[0]
-def create_batch(input_image: Image.Image) -> dict:
-    img_cond = torch.from_numpy(
-        np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
-    ).float().clip(0, 1)
-    mask_cond = img_cond[:, :, -1:]
-    rgb_cond = torch.lerp(
-        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
-    )
-    batch_elem = {
-        "rgb_cond": rgb_cond,
-        "mask_cond": mask_cond,
-        "c2w_cond": c2w_cond.unsqueeze(0),
-        "intrinsic_cond": intrinsic.unsqueeze(0),
-        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
-    }
-    return {k: v.unsqueeze(0) for k, v in batch_elem.items()}
-def generate_3d_model(input_image):
-    with torch.no_grad():
-        with torch.autocast(device_type="cuda", dtype=torch.float16):
-            model_batch = create_batch(input_image)
-            model_batch = {k: v.cuda() for k, v in model_batch.items()}
-            trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024)
-            trimesh_mesh = trimesh_mesh[0]
-    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
-    trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
-    return tmp_file.name
-def process_and_generate(prompt, height, width, steps, scales, seed):
-    # Generate image from prompt
-    generated_image = generate_image(prompt, height, width, steps, scales, seed)
-    # Generate 3D model from the image
-    glb_file = generate_3d_model(generated_image)
-    return generated_image, glb_file
-# Gradio interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Text-to-3D Model Generator")
-    with gr.Row():
-        with gr.Column(scale=3):
-            prompt = gr.Textbox(label="Your Image Description", lines=3)
-            with gr.Accordion("Advanced Settings", open=False):
-                height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024)
-                width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024)
-                steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
-                scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
-                seed = gr.Number(label="Seed", value=3413, precision=0)
-            generate_btn = gr.Button("Generate 3D Model", variant="primary")
-        with gr.Column(scale=4):
-            output_image = gr.Image(label="Generated Image")
-            output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0])
-    generate_btn.click(
-        process_and_generate,
-        inputs=[prompt, height, width, steps, scales, seed],
-        outputs=[output_image, output_3d]
-    )
-if __name__ == "__main__":
     demo.launch()

+import os
+import tempfile
+import time
+import gradio as gr
+import torch
+from PIL import Image
+from diffusers import DiffusionPipeline
+from huggingface_hub import hf_hub_download
+from sf3d.system import SF3D
+import sf3d.utils as sf3d_utils
+from gradio_litmodel3d import LitModel3D
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.bfloat16
+torch.backends.cuda.matmul.allow_tf32 = True
+huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
+# Set up environment and cache
+cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
+os.environ["TRANSFORMERS_CACHE"] = cache_path
+os.environ["HF_HUB_CACHE"] = cache_path
+os.environ["HF_HOME"] = cache_path
+if not os.path.exists(cache_path):
+    os.makedirs(cache_path, exist_ok=True)
+# Initialize Flux pipeline
+pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, token = huggingface_token).to(device)
+pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
+pipe.fuse_lora(lora_scale=0.125)
+pipe.to(device="cuda", dtype=torch.bfloat16)
+# Initialize SF3D model
+sf3d_model = SF3D.from_pretrained(
+    "stabilityai/stable-fast-3d",
+    config_name="config.yaml",
+    weight_name="model.safetensors",
+    token=huggingface_token
+)
+sf3d_model.eval().cuda()
+# Constants for SF3D
+COND_WIDTH, COND_HEIGHT = 512, 512
+COND_DISTANCE, COND_FOVY_DEG = 1.6, 40
+BACKGROUND_COLOR = [0.5, 0.5, 0.5]
+c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
+intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
+    COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
+)
+def generate_image(prompt, height, width, steps, scales, seed):
+    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+        return pipe(
+            prompt=[prompt],
+            generator=torch.Generator().manual_seed(int(seed)),
+            num_inference_steps=int(steps),
+            guidance_scale=float(scales),
+            height=int(height),
+            width=int(width),
+            max_sequence_length=256
+        ).images[0]
+def create_batch(input_image: Image.Image) -> dict:
+    img_cond = torch.from_numpy(
+        np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
+    ).float().clip(0, 1)
+    mask_cond = img_cond[:, :, -1:]
+    rgb_cond = torch.lerp(
+        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
+    )
+    batch_elem = {
+        "rgb_cond": rgb_cond,
+        "mask_cond": mask_cond,
+        "c2w_cond": c2w_cond.unsqueeze(0),
+        "intrinsic_cond": intrinsic.unsqueeze(0),
+        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
+    }
+    return {k: v.unsqueeze(0) for k, v in batch_elem.items()}
+def generate_3d_model(input_image):
+    with torch.no_grad():
+        with torch.autocast(device_type="cuda", dtype=torch.float16):
+            model_batch = create_batch(input_image)
+            model_batch = {k: v.cuda() for k, v in model_batch.items()}
+            trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024)
+            trimesh_mesh = trimesh_mesh[0]
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
+    trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
+    return tmp_file.name
+def process_and_generate(prompt, height, width, steps, scales, seed):
+    # Generate image from prompt
+    generated_image = generate_image(prompt, height, width, steps, scales, seed)
+    # Generate 3D model from the image
+    glb_file = generate_3d_model(generated_image)
+    return generated_image, glb_file
+# Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Text-to-3D Model Generator")
+    with gr.Row():
+        with gr.Column(scale=3):
+            prompt = gr.Textbox(label="Your Image Description", lines=3)
+            with gr.Accordion("Advanced Settings", open=False):
+                height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024)
+                width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024)
+                steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
+                scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
+                seed = gr.Number(label="Seed", value=3413, precision=0)
+            generate_btn = gr.Button("Generate 3D Model", variant="primary")
+        with gr.Column(scale=4):
+            output_image = gr.Image(label="Generated Image")
+            output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0])
+    generate_btn.click(
+        process_and_generate,
+        inputs=[prompt, height, width, steps, scales, seed],
+        outputs=[output_image, output_3d]
+    )
+if __name__ == "__main__":
     demo.launch()