Spaces:

prs-eth
/

marigold

Running on Zero

App Files Files Community

Anton Obukhov commited on Feb 5

Commit

a20d271

1 Parent(s): 9c30a60

update to use the new template

Browse files

Files changed (41) hide show

CONTRIBUTING.md +0 -15
README.md +2 -2
app.py +127 -578
extrude.py +0 -354
files/{bee_depth_fp32.npy → arc.jpeg} +2 -2
files/bee.jpg +2 -2
files/bee_depth_16bit.png +0 -0
files/bee_depth_colored.png +0 -0
files/{cat_depth_fp32.npy → berries.jpeg} +2 -2
files/{einstein_depth_16bit.png → butterfly.jpeg} +2 -2
files/cat.jpg +2 -2
files/cat_depth_16bit.png +0 -0
files/cat_depth_colored.png +0 -0
files/{einstein_depth_fp32.npy → concert.jpeg} +2 -2
files/dog.jpeg +3 -0
files/doughnuts.jpeg +3 -0
files/einstein.jpg +0 -0
files/einstein_depth_colored.png +0 -0
files/food.jpeg +3 -0
files/glasses.jpeg +3 -0
files/house.jpg +3 -0
files/lake.jpeg +3 -0
files/marigold.jpeg +3 -0
files/portrait_1.jpeg +3 -0
files/portrait_2.jpeg +3 -0
files/pumpkins.jpg +3 -0
files/puzzle.jpeg +3 -0
files/road.jpg +3 -0
files/scientists.jpg +3 -0
files/surfboards.jpeg +3 -0
files/surfer.jpeg +3 -0
files/swings.jpg +0 -0
files/swings_depth_16bit.png +0 -0
files/swings_depth_colored.png +0 -0
files/swings_depth_fp32.npy +0 -3
files/switzerland.jpeg +3 -0
files/teamwork.jpeg +3 -0
files/wave.jpeg +3 -0
marigold_depth_estimation.py +0 -632
marigold_logo_square.jpg +0 -0
requirements.txt +9 -13

CONTRIBUTING.md DELETED Viewed

@@ -1,15 +0,0 @@
-## Contributing instructions
-We appreciate your interest in contributing. Please follow these guidelines:
-1. **Discuss Changes:** Start a GitHub issue to talk about your proposed change before proceeding.
-2. **Pull Requests:** Avoid unsolicited PRs. Discussion helps align with project goals.
-3. **License Agreement:** By submitting a PR, you accept our LICENSE terms.
-4. **Legal Compatibility:** Ensure your change complies with our project's objectives and licensing.
-5. **Attribution:** Credit third-party code in your PR if used.
-Please, feel free to reach out for questions or assistance. Your contributions are valued, and we're excited to work together to enhance this project!

README.md CHANGED Viewed

@@ -4,12 +4,12 @@ emoji: 🏵️
 colorFrom: blue
 colorTo: red
 sdk: gradio
-sdk_version: 4.21.0
 app_file: app.py
 pinned: true
 license: cc-by-sa-4.0
 models:
-- prs-eth/marigold-v1-0
 ---
 This is a demo of the monocular depth estimation pipeline, described in the CVPR 2024 paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)

 colorFrom: blue
 colorTo: red
 sdk: gradio
+sdk_version: 4.44.1
 app_file: app.py
 pinned: true
 license: cc-by-sa-4.0
 models:
+- prs-eth/marigold-depth-v1-0
 ---
 This is a demo of the monocular depth estimation pipeline, described in the CVPR 2024 paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,613 +12,162 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # --------------------------------------------------------------------------
-# If you find this code useful, we kindly ask you to cite our paper in your work.
-# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
-# More information about the method can be found at https://marigoldmonodepth.github.io
 # --------------------------------------------------------------------------
-import functools
 import os
-import spaces
 import gradio as gr
-import numpy as np
 import torch as torch
-from PIL import Image
-from gradio_imageslider import ImageSlider
 from huggingface_hub import login
-from extrude import extrude_depth_3d
-from marigold_depth_estimation import MarigoldPipeline
-def process(
-    pipe,
-    path_input,
-    ensemble_size,
-    denoise_steps,
-    processing_res,
-    path_out_16bit=None,
-    path_out_fp32=None,
-    path_out_vis=None,
-    _input_3d_plane_near=None,
-    _input_3d_plane_far=None,
-    _input_3d_embossing=None,
-    _input_3d_filter_size=None,
-    _input_3d_frame_near=None,
-):
-    if path_out_vis is not None:
-        return (
-            [path_out_16bit, path_out_vis],
-            [path_out_16bit, path_out_fp32, path_out_vis],
-        )
-    input_image = Image.open(path_input)
-    pipe_out = pipe(
-        input_image,
-        ensemble_size=ensemble_size,
-        denoising_steps=denoise_steps,
-        processing_res=processing_res,
-        batch_size=1 if processing_res == 0 else 0,
-        show_progress_bar=True,
-    )
-    depth_pred = pipe_out.depth_np
-    depth_colored = pipe_out.depth_colored
-    depth_16bit = (depth_pred * 65535.0).astype(np.uint16)
-    path_output_dir = os.path.splitext(path_input)[0] + "_output"
-    os.makedirs(path_output_dir, exist_ok=True)
-    name_base = os.path.splitext(os.path.basename(path_input))[0]
-    path_out_fp32 = os.path.join(path_output_dir, f"{name_base}_depth_fp32.npy")
-    path_out_16bit = os.path.join(path_output_dir, f"{name_base}_depth_16bit.png")
-    path_out_vis = os.path.join(path_output_dir, f"{name_base}_depth_colored.png")
-    np.save(path_out_fp32, depth_pred)
-    Image.fromarray(depth_16bit).save(path_out_16bit, mode="I;16")
-    depth_colored.save(path_out_vis)
-    return (
-        [path_out_16bit, path_out_vis],
-        [path_out_16bit, path_out_fp32, path_out_vis],
-    )
-def process_3d(
-    input_image,
-    files,
-    size_longest_px,
-    size_longest_cm,
-    filter_size,
-    plane_near,
-    plane_far,
-    embossing,
-    frame_thickness,
-    frame_near,
-    frame_far,
-):
-    if input_image is None or len(files) < 1:
-        raise gr.Error(
-            "Please upload an image (or use examples) and compute depth first"
-        )
-    if plane_near >= plane_far:
-        raise gr.Error("NEAR plane must have a value smaller than the FAR plane")
-    def _process_3d(
-        size_longest_px,
-        filter_size,
-        vertex_colors,
-        scene_lights,
-        output_model_scale=None,
-        prepare_for_3d_printing=False,
-    ):
-        image_rgb = input_image
-        image_depth = files[0]
-        image_rgb_basename, image_rgb_ext = os.path.splitext(image_rgb)
-        image_depth_basename, image_depth_ext = os.path.splitext(image_depth)
-        image_rgb_content = Image.open(image_rgb)
-        image_rgb_w, image_rgb_h = image_rgb_content.width, image_rgb_content.height
-        image_rgb_d = max(image_rgb_w, image_rgb_h)
-        image_new_w = size_longest_px * image_rgb_w // image_rgb_d
-        image_new_h = size_longest_px * image_rgb_h // image_rgb_d
-        image_rgb_new = image_rgb_basename + f"_{size_longest_px}" + image_rgb_ext
-        image_depth_new = image_depth_basename + f"_{size_longest_px}" + image_depth_ext
-        image_rgb_content.resize((image_new_w, image_new_h), Image.LANCZOS).save(
-            image_rgb_new
-        )
-        Image.open(image_depth).resize((image_new_w, image_new_h), Image.BILINEAR).save(
-            image_depth_new
-        )
-        path_glb, path_stl = extrude_depth_3d(
-            image_rgb_new,
-            image_depth_new,
-            output_model_scale=(
-                size_longest_cm * 10
-                if output_model_scale is None
-                else output_model_scale
-            ),
-            filter_size=filter_size,
-            coef_near=plane_near,
-            coef_far=plane_far,
-            emboss=embossing / 100,
-            f_thic=frame_thickness / 100,
-            f_near=frame_near / 100,
-            f_back=frame_far / 100,
-            vertex_colors=vertex_colors,
-            scene_lights=scene_lights,
-            prepare_for_3d_printing=prepare_for_3d_printing,
-        )
-        return path_glb, path_stl
-    path_viewer_glb, _ = _process_3d(
-        256, filter_size, vertex_colors=False, scene_lights=True, output_model_scale=1
-    )
-    path_files_glb, path_files_stl = _process_3d(
-        size_longest_px,
-        filter_size,
-        vertex_colors=True,
-        scene_lights=False,
-        prepare_for_3d_printing=True,
-    )
-    return path_viewer_glb, [path_files_glb, path_files_stl]
-def run_demo_server(pipe):
-    process_pipe = spaces.GPU(functools.partial(process, pipe), duration=120)
-    os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
-    with gr.Blocks(
-        analytics_enabled=False,
-        title="Marigold Depth Estimation",
-        css="""
-            #download {
-                height: 118px;
-            }
-            .slider .inner {
-                width: 5px;
-                background: #FFF;
-            }
-            .viewport {
-                aspect-ratio: 4/3;
-            }
-            h1 {
-                text-align: center;
-                display: block;
-            }
-            h2 {
-                text-align: center;
-                display: block;
-            }
-            h3 {
-                text-align: center;
-                display: block;
-            }
-        """,
-    ) as demo:
         gr.Markdown(
             """
-            # Marigold Depth Estimation
             <p align="center">
             <a title="Website" href="https://marigoldmonodepth.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
             </a>
             <a title="arXiv" href="https://arxiv.org/abs/2312.02145" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://www.obukhov.ai/img/badges/badge-pdf.svg">
             </a>
             <a title="Github" href="https://github.com/prs-eth/marigold" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
                 <img src="https://img.shields.io/github/stars/prs-eth/marigold?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars">
             </a>
             <a title="Social" href="https://twitter.com/antonobukhov1" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
-                <img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
             </a>
             </p>
-            Marigold is the state-of-the-art depth estimator for images in the wild.
-            Upload your image into the <b>first</b> pane, or click any of the <b>examples</b> below.
-            The result will be computed and appear in the <b>second</b> pane.
-            Scroll down to use the computed depth map for creating a 3D printable asset.
-            <a href="https://huggingface.co/spaces/prs-eth/marigold-lcm" style="color: crimson;">
-            <h3 style="color: crimson;">Check out Marigold-LCM — a FAST version of this demo!<h3>
-            </a>
         """
         )
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(
-                    label="Input Image",
-                    type="filepath",
-                )
-                with gr.Accordion("Advanced options", open=False):
-                    ensemble_size = gr.Slider(
-                        label="Ensemble size",
-                        minimum=1,
-                        maximum=20,
-                        step=1,
-                        value=10,
-                    )
-                    denoise_steps = gr.Slider(
-                        label="Number of denoising steps",
-                        minimum=1,
-                        maximum=20,
-                        step=1,
-                        value=10,
-                    )
-                    processing_res = gr.Radio(
-                        [
-                            ("Native", 0),
-                            ("Recommended", 768),
-                        ],
-                        label="Processing resolution",
-                        value=768,
-                    )
-                input_output_16bit = gr.File(
-                    label="Predicted depth (16-bit)",
-                    visible=False,
-                )
-                input_output_fp32 = gr.File(
-                    label="Predicted depth (32-bit)",
-                    visible=False,
-                )
-                input_output_vis = gr.File(
-                    label="Predicted depth (red-near, blue-far)",
-                    visible=False,
-                )
-                with gr.Row():
-                    submit_btn = gr.Button(value="Compute Depth", variant="primary")
-                    clear_btn = gr.Button(value="Clear")
-            with gr.Column():
-                output_slider = ImageSlider(
-                    label="Predicted depth (red-near, blue-far)",
-                    type="filepath",
-                    show_download_button=True,
-                    show_share_button=True,
-                    interactive=False,
-                    elem_classes="slider",
-                    position=0.25,
-                )
-                files = gr.Files(
-                    label="Depth outputs",
-                    elem_id="download",
-                    interactive=False,
-                )
-        demo_3d_header = gr.Markdown(
-            """
-            <h3 align="center">3D Printing Depth Maps</h3>
-            <p align="justify">
-                This part of the demo uses Marigold depth maps estimated in the previous step to create a
-                3D-printable model. The models are watertight, with correct normals, and exported in the STL format.
-                We recommended creating the first model with the default parameters and iterating on it until the best
-                result (see Pro Tips below).
-            </p>
-            """,
-            render=False,
-        )
-        demo_3d = gr.Row(render=False)
-        with demo_3d:
-            with gr.Column():
-                with gr.Accordion("3D printing demo: Main options", open=True):
-                    plane_near = gr.Slider(
-                        label="Relative position of the near plane (between 0 and 1)",
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.001,
-                        value=0.0,
-                    )
-                    plane_far = gr.Slider(
-                        label="Relative position of the far plane (between near and 1)",
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.001,
-                        value=1.0,
-                    )
-                    embossing = gr.Slider(
-                        label="Embossing level",
-                        minimum=0,
-                        maximum=100,
-                        step=1,
-                        value=20,
-                    )
-                with gr.Accordion("3D printing demo: Advanced options", open=False):
-                    size_longest_px = gr.Slider(
-                        label="Size (px) of the longest side",
-                        minimum=256,
-                        maximum=1024,
-                        step=256,
-                        value=512,
-                    )
-                    size_longest_cm = gr.Slider(
-                        label="Size (cm) of the longest side",
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=10,
-                    )
-                    filter_size = gr.Slider(
-                        label="Size (px) of the smoothing filter",
-                        minimum=1,
-                        maximum=5,
-                        step=2,
-                        value=3,
-                    )
-                    frame_thickness = gr.Slider(
-                        label="Frame thickness",
-                        minimum=0,
-                        maximum=100,
-                        step=1,
-                        value=5,
-                    )
-                    frame_near = gr.Slider(
-                        label="Frame's near plane offset",
-                        minimum=-100,
-                        maximum=100,
-                        step=1,
-                        value=1,
-                    )
-                    frame_far = gr.Slider(
-                        label="Frame's far plane offset",
-                        minimum=1,
-                        maximum=10,
-                        step=1,
-                        value=1,
-                    )
-                with gr.Row():
-                    submit_3d = gr.Button(value="Create 3D", variant="primary")
-                    clear_3d = gr.Button(value="Clear 3D")
-                gr.Markdown(
-                    """
-                    <h5 align="center">Pro Tips</h5>
-                    <ol>
-                      <li><b>Re-render with new parameters</b>: Click "Clear 3D" and then "Create 3D".</li>
-                      <li><b>Adjust 3D scale and cut-off focus</b>: Set the frame's near plane offset to the
-                          minimum and use 3D preview to evaluate depth scaling. Repeat until the scale is correct and
-                          everything important is in the focus. Set the optimal value for frame's near
-                          plane offset as a last step.</li>
-                      <li><b>Increase details</b>: Decrease size of the smoothing filter (also increases noise).</li>
-                    </ol>
-                    """
-                )
-            with gr.Column():
-                viewer_3d = gr.Model3D(
-                    camera_position=(75.0, 90.0, 1.25),
-                    elem_classes="viewport",
-                    label="3D preview (low-res, relief highlight)",
-                    interactive=False,
-                )
-                files_3d = gr.Files(
-                    label="3D model outputs (high-res)",
-                    elem_id="download",
-                    interactive=False,
-                )
-        blocks_settings_depth = [ensemble_size, denoise_steps, processing_res]
-        blocks_settings_3d = [
-            plane_near,
-            plane_far,
-            embossing,
-            size_longest_px,
-            size_longest_cm,
-            filter_size,
-            frame_thickness,
-            frame_near,
-            frame_far,
-        ]
-        blocks_settings = blocks_settings_depth + blocks_settings_3d
-        map_id_to_default = {b._id: b.value for b in blocks_settings}
-        inputs = [
-            input_image,
-            ensemble_size,
-            denoise_steps,
-            processing_res,
-            input_output_16bit,
-            input_output_fp32,
-            input_output_vis,
-            plane_near,
-            plane_far,
-            embossing,
-            filter_size,
-            frame_near,
-        ]
-        outputs = [
-            submit_btn,
-            input_image,
-            output_slider,
-            files,
-        ]
-        def submit_depth_fn(*args):
-            out = list(process_pipe(*args))
-            out = [gr.Button(interactive=False), gr.Image(interactive=False)] + out
-            return out
-        submit_btn.click(
-            fn=submit_depth_fn,
-            inputs=inputs,
-            outputs=outputs,
-            concurrency_limit=1,
-        )
-        gr.Examples(
-            fn=submit_depth_fn,
-            examples=[
-                [
-                    "files/bee.jpg",
-                    10,  # ensemble_size
-                    10,  # denoise_steps
-                    768,  # processing_res
-                    "files/bee_depth_16bit.png",
-                    "files/bee_depth_fp32.npy",
-                    "files/bee_depth_colored.png",
-                    0.0,  # plane_near
-                    0.5,  # plane_far
-                    20,  # embossing
-                    3,  # filter_size
-                    0,  # frame_near
-                ],
-                [
-                    "files/cat.jpg",
-                    10,  # ensemble_size
-                    10,  # denoise_steps
-                    768,  # processing_res
-                    "files/cat_depth_16bit.png",
-                    "files/cat_depth_fp32.npy",
-                    "files/cat_depth_colored.png",
-                    0.0,  # plane_near
-                    0.3,  # plane_far
-                    20,  # embossing
-                    3,  # filter_size
-                    0,  # frame_near
-                ],
                 [
-                    "files/swings.jpg",
-                    10,  # ensemble_size
-                    10,  # denoise_steps
-                    768,  # processing_res
-                    "files/swings_depth_16bit.png",
-                    "files/swings_depth_fp32.npy",
-                    "files/swings_depth_colored.png",
-                    0.05,  # plane_near
-                    0.25,  # plane_far
-                    10,  # embossing
-                    1,  # filter_size
-                    0,  # frame_near
                 ],
-                [
-                    "files/einstein.jpg",
-                    10,  # ensemble_size
-                    10,  # denoise_steps
-                    768,  # processing_res
-                    "files/einstein_depth_16bit.png",
-                    "files/einstein_depth_fp32.npy",
-                    "files/einstein_depth_colored.png",
-                    0.0,  # plane_near
-                    0.5,  # plane_far
-                    50,  # embossing
-                    3,  # filter_size
-                    -15,  # frame_near
-                ],
-            ],
-            inputs=inputs,
-            outputs=outputs,
-            cache_examples=True,
-        )
-        demo_3d_header.render()
-        demo_3d.render()
-        def clear_fn():
-            out = []
-            for b in blocks_settings:
-                out.append(map_id_to_default[b._id])
-            out += [
-                gr.Button(interactive=True),
-                gr.Button(interactive=True),
-                gr.Image(value=None, interactive=True),
-                None,
-                None,
-                None,
-                None,
-                None,
-                None,
-                None,
-            ]
-            return out
-        clear_btn.click(
-            fn=clear_fn,
-            inputs=[],
-            outputs=blocks_settings
-            + [
-                submit_btn,
-                submit_3d,
-                input_image,
-                input_output_16bit,
-                input_output_fp32,
-                input_output_vis,
-                output_slider,
-                files,
-                viewer_3d,
-                files_3d,
-            ],
-        )
-        def submit_3d_fn(*args):
-            out = list(process_3d(*args))
-            out = [gr.Button(interactive=False)] + out
-            return out
-        submit_3d.click(
-            fn=submit_3d_fn,
-            inputs=[
-                input_image,
-                files,
-                size_longest_px,
-                size_longest_cm,
-                filter_size,
-                plane_near,
-                plane_far,
-                embossing,
-                frame_thickness,
-                frame_near,
-                frame_far,
-            ],
-            outputs=[submit_3d, viewer_3d, files_3d],
-            concurrency_limit=1,
-        )
-        def clear_3d_fn():
-            return [gr.Button(interactive=True), None, None]
-        clear_3d.click(
-            fn=clear_3d_fn,
-            inputs=[],
-            outputs=[submit_3d, viewer_3d, files_3d],
-        )
-        demo.queue(
-            api_open=False,
-        ).launch(
-            server_name="0.0.0.0",
-            server_port=7860,
         )
-def main():
-    CHECKPOINT = "prs-eth/marigold-v1-0"
-    if "HF_TOKEN_LOGIN" in os.environ:
-        login(token=os.environ["HF_TOKEN_LOGIN"])
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    pipe = MarigoldPipeline.from_pretrained(CHECKPOINT)
-    try:
-        import xformers
-        pipe.enable_xformers_memory_efficient_attention()
-    except:
-        pass  # run without xformers
-    pipe = pipe.to(device)
-    run_demo_server(pipe)
-if __name__ == "__main__":
-    main()

+# Copyright 2023-2025 Marigold Team, ETH Zürich. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # --------------------------------------------------------------------------
+# More information about Marigold:
+#   https://marigoldmonodepth.github.io
+#   https://marigoldcomputervision.github.io
+# Efficient inference pipelines are now part of diffusers:
+#   https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage
+#   https://huggingface.co/docs/diffusers/api/pipelines/marigold
+# Examples of trained models and live demos:
+#   https://huggingface.co/prs-eth
+# Related projects:
+#   https://marigolddepthcompletion.github.io/
+#   https://rollingdepth.github.io/
+# Citation (BibTeX):
+#   https://github.com/prs-eth/Marigold#-citation
+#   https://github.com/prs-eth/Marigold-DC#-citation
+#   https://github.com/prs-eth/rollingdepth#-citation
 # --------------------------------------------------------------------------
 import os
 import gradio as gr
 import torch as torch
+from diffusers import MarigoldDepthPipeline, DDIMScheduler
+from gradio_dualvision import DualVisionApp
 from huggingface_hub import login
+from PIL import Image
+CHECKPOINT = "prs-eth/marigold-depth-v1-0"
+if "HF_TOKEN_LOGIN" in os.environ:
+    login(token=os.environ["HF_TOKEN_LOGIN"])
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+pipe = MarigoldDepthPipeline.from_pretrained(CHECKPOINT)
+pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
+pipe = pipe.to(device=device, dtype=dtype)
+try:
+    import xformers
+    pipe.enable_xformers_memory_efficient_attention()
+except:
+    pass
+class MarigoldDepthApp(DualVisionApp):
+    DEFAULT_SEED = 2024
+    DEFAULT_ENSEMBLE_SIZE = 1
+    DEFAULT_DENOISE_STEPS = 4
+    DEFAULT_PROCESSING_RES = 768
+    def make_header(self):
         gr.Markdown(
             """
+            ## Marigold Depth Estimation
             <p align="center">
             <a title="Website" href="https://marigoldmonodepth.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/badge/%E2%99%A5%20Project%20-Website-blue">
             </a>
             <a title="arXiv" href="https://arxiv.org/abs/2312.02145" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/badge/%F0%9F%93%84%20Read%20-Paper-AF3436">
             </a>
             <a title="Github" href="https://github.com/prs-eth/marigold" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
                 <img src="https://img.shields.io/github/stars/prs-eth/marigold?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars">
             </a>
+            <a title="Video Depth" href="https://huggingface.co/spaces/prs-eth/rollingdepth" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Video%20Depth%20-Demo-yellow" alt="videodepth">
+            </a>
+            <a title="Depth-to-3D" href="https://huggingface.co/spaces/prs-eth/depth-to-3d-print" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Depth--to--3D%20-Demo-yellow" alt="depthto3d">
+            </a>
             <a title="Social" href="https://twitter.com/antonobukhov1" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
+                <img src="https://shields.io/twitter/follow/:?label=Subscribe%20for%20updates!" alt="social">
             </a>
+            </p>
+            <p align="center" style="margin-top: 0px;">
+                Upload a photo or select an example below to compute depth maps in real time.
+                Use the slider to reveal areas of interest.
+                Use the radio-buttons to switch between modalities.
+                Check our other demo badges above for new or relocated functionality.
             </p>
         """
         )
+    def build_user_components(self):
+        with gr.Column():
+            ensemble_size = gr.Slider(
+                label="Ensemble size",
+                minimum=1,
+                maximum=20,
+                step=1,
+                value=10,
+            )
+            denoise_steps = gr.Slider(
+                label="Number of denoising steps",
+                minimum=1,
+                maximum=20,
+                step=1,
+                value=10,
+            )
+            processing_res = gr.Radio(
                 [
+                    ("Native", 0),
+                    ("Recommended", 768),
                 ],
+                label="Processing resolution",
+                value=768,
+            )
+        return {
+            "ensemble_size": ensemble_size,
+            "denoise_steps": denoise_steps,
+            "processing_res": processing_res,
+        }
+    def process(self, image_in: Image.Image, **kwargs):
+        ensemble_size = kwargs.get("ensemble_size", self.DEFAULT_ENSEMBLE_SIZE)
+        denoise_steps = kwargs.get("denoise_steps", self.DEFAULT_DENOISE_STEPS)
+        processing_res = kwargs.get("processing_res", self.DEFAULT_PROCESSING_RES)
+        generator = torch.Generator(device=device).manual_seed(self.DEFAULT_SEED)
+        pipe_out = pipe(
+            image_in,
+            ensemble_size=ensemble_size,
+            num_inference_steps=denoise_steps,
+            processing_resolution=processing_res,
+            batch_size=1 if processing_res == 0 else 2,
+            output_uncertainty=ensemble_size >= 3,
+            generator=generator,
         )
+        depth_vis = pipe.image_processor.visualize_depth(pipe_out.prediction)[0]
+        depth_16bit = pipe.image_processor.export_depth_to_16bit_png(pipe_out.prediction)[0]
+        out_modalities = {
+            "Depth Visualization": depth_vis,
+            "Depth 16-bit": depth_16bit,
+        }
+        if ensemble_size >= 3:
+            uncertainty = pipe.image_processor.visualize_uncertainty(pipe_out.uncertainty)[0]
+            out_modalities["Uncertainty"] = uncertainty
+        out_settings = {
+            "ensemble_size": ensemble_size,
+            "denoise_steps": denoise_steps,
+            "processing_res": processing_res,
+        }
+        return out_modalities, out_settings
+with MarigoldDepthApp(
+    title="Marigold Depth",
+    examples_path="files",
+    examples_per_page=5,
+    squeeze_canvas=True,
+) as demo:
+    demo.queue(
+        api_open=False,
+    ).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+    )

extrude.py DELETED Viewed

@@ -1,354 +0,0 @@
-# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# --------------------------------------------------------------------------
-# If you find this code useful, we kindly ask you to cite our paper in your work.
-# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
-# More information about the method can be found at https://marigoldmonodepth.github.io
-# --------------------------------------------------------------------------
-import math
-import os
-import numpy as np
-import pygltflib
-import trimesh
-from PIL import Image, ImageFilter
-def quaternion_multiply(q1, q2):
-    x1, y1, z1, w1 = q1
-    x2, y2, z2, w2 = q2
-    return [
-        w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2,
-        w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2,
-        w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2,
-        w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2,
-    ]
-def glb_add_lights(path_input, path_output):
-    """
-    Adds directional lights in the horizontal plane to the glb file.
-    :param path_input: path to input glb
-    :param path_output: path to output glb
-    :return: None
-    """
-    glb = pygltflib.GLTF2().load(path_input)
-    N = 3  # default max num lights in Babylon.js is 4
-    angle_step = 2 * math.pi / N
-    elevation_angle = math.radians(75)
-    light_colors = [
-        [1.0, 0.0, 0.0],
-        [0.0, 1.0, 0.0],
-        [0.0, 0.0, 1.0],
-    ]
-    lights_extension = {
-        "lights": [
-            {"type": "directional", "color": light_colors[i], "intensity": 2.0}
-            for i in range(N)
-        ]
-    }
-    if "KHR_lights_punctual" not in glb.extensionsUsed:
-        glb.extensionsUsed.append("KHR_lights_punctual")
-    glb.extensions["KHR_lights_punctual"] = lights_extension
-    light_nodes = []
-    for i in range(N):
-        angle = i * angle_step
-        pos_rot = [0.0, 0.0, math.sin(angle / 2), math.cos(angle / 2)]
-        elev_rot = [
-            math.sin(elevation_angle / 2),
-            0.0,
-            0.0,
-            math.cos(elevation_angle / 2),
-        ]
-        rotation = quaternion_multiply(pos_rot, elev_rot)
-        node = {
-            "rotation": rotation,
-            "extensions": {"KHR_lights_punctual": {"light": i}},
-        }
-        light_nodes.append(node)
-    light_node_indices = list(range(len(glb.nodes), len(glb.nodes) + N))
-    glb.nodes.extend(light_nodes)
-    root_node_index = glb.scenes[glb.scene].nodes[0]
-    root_node = glb.nodes[root_node_index]
-    if hasattr(root_node, "children"):
-        root_node.children.extend(light_node_indices)
-    else:
-        root_node.children = light_node_indices
-    glb.save(path_output)
-def extrude_depth_3d(
-    path_rgb,
-    path_depth,
-    output_model_scale=100,
-    filter_size=3,
-    coef_near=0.0,
-    coef_far=1.0,
-    emboss=0.3,
-    f_thic=0.05,
-    f_near=-0.15,
-    f_back=0.01,
-    vertex_colors=True,
-    scene_lights=True,
-    prepare_for_3d_printing=False,
-):
-    f_far_inner = -emboss
-    f_far_outer = f_far_inner - f_back
-    f_near = max(f_near, f_far_inner)
-    depth_image = Image.open(path_depth)
-    assert depth_image.mode == "I", depth_image.mode
-    depth_image = depth_image.filter(ImageFilter.MedianFilter(size=filter_size))
-    w, h = depth_image.size
-    d_max = max(w, h)
-    depth_image = np.array(depth_image).astype(np.double)
-    z_min, z_max = np.min(depth_image), np.max(depth_image)
-    depth_image = (depth_image.astype(np.double) - z_min) / (z_max - z_min)
-    depth_image[depth_image < coef_near] = coef_near
-    depth_image[depth_image > coef_far] = coef_far
-    depth_image = emboss * (depth_image - coef_near) / (coef_far - coef_near)
-    rgb_image = np.array(
-        Image.open(path_rgb).convert("RGB").resize((w, h), Image.Resampling.LANCZOS)
-    )
-    w_norm = w / float(d_max - 1)
-    h_norm = h / float(d_max - 1)
-    w_half = w_norm / 2
-    h_half = h_norm / 2
-    x, y = np.meshgrid(np.arange(w), np.arange(h))
-    x = x / float(d_max - 1) - w_half  # [-w_half, w_half]
-    y = -y / float(d_max - 1) + h_half  # [-h_half, h_half]
-    z = -depth_image  # -depth_emboss (far) - 0 (near)
-    vertices_2d = np.stack((x, y, z), axis=-1)
-    vertices = vertices_2d.reshape(-1, 3)
-    colors = rgb_image[:, :, :3].reshape(-1, 3) / 255.0
-    faces = []
-    for y in range(h - 1):
-        for x in range(w - 1):
-            idx = y * w + x
-            faces.append([idx, idx + w, idx + 1])
-            faces.append([idx + 1, idx + w, idx + 1 + w])
-    # OUTER frame
-    nv = len(vertices)
-    vertices = np.append(
-        vertices,
-        [
-            [-w_half - f_thic, -h_half - f_thic, f_near],  # 00
-            [-w_half - f_thic, -h_half - f_thic, f_far_outer],  # 01
-            [w_half + f_thic, -h_half - f_thic, f_near],  # 02
-            [w_half + f_thic, -h_half - f_thic, f_far_outer],  # 03
-            [w_half + f_thic, h_half + f_thic, f_near],  # 04
-            [w_half + f_thic, h_half + f_thic, f_far_outer],  # 05
-            [-w_half - f_thic, h_half + f_thic, f_near],  # 06
-            [-w_half - f_thic, h_half + f_thic, f_far_outer],  # 07
-        ],
-        axis=0,
-    )
-    faces.extend(
-        [
-            [nv + 0, nv + 1, nv + 2],
-            [nv + 2, nv + 1, nv + 3],
-            [nv + 2, nv + 3, nv + 4],
-            [nv + 4, nv + 3, nv + 5],
-            [nv + 4, nv + 5, nv + 6],
-            [nv + 6, nv + 5, nv + 7],
-            [nv + 6, nv + 7, nv + 0],
-            [nv + 0, nv + 7, nv + 1],
-        ]
-    )
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * 8, axis=0)
-    # INNER frame
-    nv = len(vertices)
-    vertices_left_data = vertices_2d[:, 0]  # H x 3
-    vertices_left_frame = vertices_2d[:, 0].copy()  # H x 3
-    vertices_left_frame[:, 2] = f_near
-    vertices = np.append(vertices, vertices_left_data, axis=0)
-    vertices = np.append(vertices, vertices_left_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 * h), axis=0)
-    for i in range(h - 1):
-        nvi_d = nv + i
-        nvi_f = nvi_d + h
-        faces.append([nvi_d, nvi_f, nvi_d + 1])
-        faces.append([nvi_d + 1, nvi_f, nvi_f + 1])
-    nv = len(vertices)
-    vertices_right_data = vertices_2d[:, -1]  # H x 3
-    vertices_right_frame = vertices_2d[:, -1].copy()  # H x 3
-    vertices_right_frame[:, 2] = f_near
-    vertices = np.append(vertices, vertices_right_data, axis=0)
-    vertices = np.append(vertices, vertices_right_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 * h), axis=0)
-    for i in range(h - 1):
-        nvi_d = nv + i
-        nvi_f = nvi_d + h
-        faces.append([nvi_d, nvi_d + 1, nvi_f])
-        faces.append([nvi_d + 1, nvi_f + 1, nvi_f])
-    nv = len(vertices)
-    vertices_top_data = vertices_2d[0, :]  # H x 3
-    vertices_top_frame = vertices_2d[0, :].copy()  # H x 3
-    vertices_top_frame[:, 2] = f_near
-    vertices = np.append(vertices, vertices_top_data, axis=0)
-    vertices = np.append(vertices, vertices_top_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 * w), axis=0)
-    for i in range(w - 1):
-        nvi_d = nv + i
-        nvi_f = nvi_d + w
-        faces.append([nvi_d, nvi_d + 1, nvi_f])
-        faces.append([nvi_d + 1, nvi_f + 1, nvi_f])
-    nv = len(vertices)
-    vertices_bottom_data = vertices_2d[-1, :]  # H x 3
-    vertices_bottom_frame = vertices_2d[-1, :].copy()  # H x 3
-    vertices_bottom_frame[:, 2] = f_near
-    vertices = np.append(vertices, vertices_bottom_data, axis=0)
-    vertices = np.append(vertices, vertices_bottom_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 * w), axis=0)
-    for i in range(w - 1):
-        nvi_d = nv + i
-        nvi_f = nvi_d + w
-        faces.append([nvi_d, nvi_f, nvi_d + 1])
-        faces.append([nvi_d + 1, nvi_f, nvi_f + 1])
-    # FRONT frame
-    nv = len(vertices)
-    vertices = np.append(
-        vertices,
-        [
-            [-w_half - f_thic, -h_half - f_thic, f_near],
-            [-w_half - f_thic, h_half + f_thic, f_near],
-        ],
-        axis=0,
-    )
-    vertices = np.append(vertices, vertices_left_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 + h), axis=0)
-    for i in range(h - 1):
-        faces.append([nv, nv + 2 + i + 1, nv + 2 + i])
-    faces.append([nv, nv + 2, nv + 1])
-    nv = len(vertices)
-    vertices = np.append(
-        vertices,
-        [
-            [w_half + f_thic, h_half + f_thic, f_near],
-            [w_half + f_thic, -h_half - f_thic, f_near],
-        ],
-        axis=0,
-    )
-    vertices = np.append(vertices, vertices_right_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 + h), axis=0)
-    for i in range(h - 1):
-        faces.append([nv, nv + 2 + i, nv + 2 + i + 1])
-    faces.append([nv, nv + h + 1, nv + 1])
-    nv = len(vertices)
-    vertices = np.append(
-        vertices,
-        [
-            [w_half + f_thic, h_half + f_thic, f_near],
-            [-w_half - f_thic, h_half + f_thic, f_near],
-        ],
-        axis=0,
-    )
-    vertices = np.append(vertices, vertices_top_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 + w), axis=0)
-    for i in range(w - 1):
-        faces.append([nv, nv + 2 + i, nv + 2 + i + 1])
-    faces.append([nv, nv + 1, nv + 2])
-    nv = len(vertices)
-    vertices = np.append(
-        vertices,
-        [
-            [-w_half - f_thic, -h_half - f_thic, f_near],
-            [w_half + f_thic, -h_half - f_thic, f_near],
-        ],
-        axis=0,
-    )
-    vertices = np.append(vertices, vertices_bottom_frame, axis=0)
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * (2 + w), axis=0)
-    for i in range(w - 1):
-        faces.append([nv, nv + 2 + i + 1, nv + 2 + i])
-    faces.append([nv, nv + 1, nv + w + 1])
-    # BACK frame
-    nv = len(vertices)
-    vertices = np.append(
-        vertices,
-        [
-            [-w_half - f_thic, -h_half - f_thic, f_far_outer],  # 00
-            [w_half + f_thic, -h_half - f_thic, f_far_outer],  # 01
-            [w_half + f_thic, h_half + f_thic, f_far_outer],  # 02
-            [-w_half - f_thic, h_half + f_thic, f_far_outer],  # 03
-        ],
-        axis=0,
-    )
-    faces.extend(
-        [
-            [nv + 0, nv + 2, nv + 1],
-            [nv + 2, nv + 0, nv + 3],
-        ]
-    )
-    colors = np.append(colors, [[0.5, 0.5, 0.5]] * 4, axis=0)
-    trimesh_kwargs = {}
-    if vertex_colors:
-        trimesh_kwargs["vertex_colors"] = colors
-    mesh = trimesh.Trimesh(vertices=vertices, faces=faces, **trimesh_kwargs)
-    mesh.merge_vertices()
-    current_max_dimension = max(mesh.extents)
-    scaling_factor = output_model_scale / current_max_dimension
-    mesh.apply_scale(scaling_factor)
-    if prepare_for_3d_printing:
-        rotation_mat = trimesh.transformations.rotation_matrix(
-            np.radians(90), [-1, 0, 0]
-        )
-        mesh.apply_transform(rotation_mat)
-    path_out_base = os.path.splitext(path_depth)[0].replace("_16bit", "")
-    path_out_glb = path_out_base + ".glb"
-    path_out_stl = path_out_base + ".stl"
-    mesh.export(path_out_glb, file_type="glb")
-    if scene_lights:
-        glb_add_lights(path_out_glb, path_out_glb)
-    mesh.export(path_out_stl, file_type="stl")
-    return path_out_glb, path_out_stl

files/{bee_depth_fp32.npy → arc.jpeg} RENAMED Viewed

File without changes

files/bee.jpg CHANGED Viewed

Git LFS Details

SHA256: 863fccd5ac347c831520ecbb1331e19bc5cfc3caf51acac8dd9a838262a612df
Pointer size: 130 Bytes
Size of remote file: 77.9 kB

Git LFS Details

SHA256: 7643ccdbc9550e2bf6ebdd5c768db5bc829ef719b0d1a91b4f6f9184b52f4751
Pointer size: 131 Bytes
Size of remote file: 146 kB

files/bee_depth_16bit.png DELETED Viewed

Binary file (504 kB)

files/bee_depth_colored.png DELETED Viewed

Binary file (221 kB)

files/{cat_depth_fp32.npy → berries.jpeg} RENAMED Viewed

File without changes

files/{einstein_depth_16bit.png → butterfly.jpeg} RENAMED Viewed

File without changes

files/cat.jpg CHANGED Viewed

Git LFS Details

SHA256: 7da86be40e88f33249ce3d7e31b8e725cdc7c8a7daaf45f2c9349860bb6e5deb
Pointer size: 131 Bytes
Size of remote file: 131 kB

Git LFS Details

SHA256: 794796a86e56a4b372287661dc934daa2d15e988d01afe88afc50b32644c007a
Pointer size: 131 Bytes
Size of remote file: 236 kB

files/cat_depth_16bit.png DELETED Viewed

Binary file (557 kB)

files/cat_depth_colored.png DELETED Viewed

Binary file (221 kB)

files/{einstein_depth_fp32.npy → concert.jpeg} RENAMED Viewed

File without changes

files/dog.jpeg ADDED Viewed

Git LFS Details

SHA256: c932a965dfe63c8c6dbc1bb48f7ea245a6a6dd2fb40fd243545e908b3aa7aa62
Pointer size: 131 Bytes
Size of remote file: 672 kB

files/doughnuts.jpeg ADDED Viewed

Git LFS Details

SHA256: 2ede4170b4a17f0c076c1a336eb4d3c03d64688997a986e3a8101972016b799a
Pointer size: 131 Bytes
Size of remote file: 607 kB

files/einstein.jpg CHANGED Viewed

Git LFS Details

SHA256: d4a4543c0fffb2ca5ea3c17e23e88fcfcf66eae8b487173fbc5c25d0d614bdb6
Pointer size: 131 Bytes
Size of remote file: 367 kB

files/einstein_depth_colored.png DELETED Viewed

Binary file (746 kB)

files/food.jpeg ADDED Viewed

Git LFS Details

SHA256: a26151050a574b0dc0014e9c4806da3d6f6bc1297ee1035a16b9ace007a179af
Pointer size: 132 Bytes
Size of remote file: 1.04 MB

files/glasses.jpeg ADDED Viewed

Git LFS Details

SHA256: de8c0c20adb7c187357c21e467d3f178888574962027cdd366c390b63913ffec
Pointer size: 131 Bytes
Size of remote file: 677 kB

files/house.jpg ADDED Viewed

Git LFS Details

SHA256: 4087027e84a6323099fc839fd0b6816fd614814e92d12df21051cff3ed472819
Pointer size: 133 Bytes
Size of remote file: 14.9 MB

files/lake.jpeg ADDED Viewed

Git LFS Details

SHA256: 181dc0f684f0f3b94bc4bec829becd3dec817f69032731edf55ee8370c6898f0
Pointer size: 132 Bytes
Size of remote file: 1.03 MB

files/marigold.jpeg ADDED Viewed

Git LFS Details

SHA256: 575c1a7bc1199d86b5ec305b4efc12286842dee4a189e8699dcf8a6d0276807c
Pointer size: 131 Bytes
Size of remote file: 416 kB

files/portrait_1.jpeg ADDED Viewed

Git LFS Details

SHA256: 76e3ad74311975f0db43cdebd4202d1464e19b6950cc3e7c5aa0a160f95493c3
Pointer size: 131 Bytes
Size of remote file: 506 kB

files/portrait_2.jpeg ADDED Viewed

Git LFS Details

SHA256: 805ad1127b0d9d09068df70e3ab7aa7450ff802fa5464db8430787dfee1ec6a0
Pointer size: 131 Bytes
Size of remote file: 525 kB

files/pumpkins.jpg ADDED Viewed

Git LFS Details

SHA256: 92f03bc05dc882231bce735f2afb8c27eb9d0616166abe3794b39ff24314fd0a
Pointer size: 133 Bytes
Size of remote file: 11.3 MB

files/puzzle.jpeg ADDED Viewed

Git LFS Details

SHA256: 60b66432124a0936c6143301a9f9b793af4184bc9340c567d11fdd5a22cc98cc
Pointer size: 131 Bytes
Size of remote file: 374 kB

files/road.jpg ADDED Viewed

Git LFS Details

SHA256: 58bb01aea37f6e1206260eddb6d003589d779e8b3fb3ef0a0f1e2e38a8fa3925
Pointer size: 133 Bytes
Size of remote file: 13.1 MB

files/scientists.jpg ADDED Viewed

Git LFS Details

SHA256: 7b164dfbc4ab6e491ce81972b8c0e076fdc4af622289d0aa3cb43ee3c2be4030
Pointer size: 131 Bytes
Size of remote file: 444 kB

files/surfboards.jpeg ADDED Viewed

Git LFS Details

SHA256: 326f9ffd3b85b29b971205eb87c2d0c9b5e4409b496be1eb961b46d5f7c5d6c6
Pointer size: 132 Bytes
Size of remote file: 1.16 MB

files/surfer.jpeg ADDED Viewed

Git LFS Details

SHA256: 52827abf2c3951b752d4e58c88fff7ab907672c58fda70b813df3922650c7495
Pointer size: 132 Bytes
Size of remote file: 1.01 MB

files/swings.jpg CHANGED Viewed

Git LFS Details

SHA256: cae2ac669c948313eae8aca53017f10b64b42f87c53b9c34639962b218fdf1f1
Pointer size: 131 Bytes
Size of remote file: 353 kB

Git LFS Details

SHA256: cae2ac669c948313eae8aca53017f10b64b42f87c53b9c34639962b218fdf1f1
Pointer size: 131 Bytes
Size of remote file: 353 kB

files/swings_depth_16bit.png DELETED Viewed

Binary file (523 kB)

files/swings_depth_colored.png DELETED Viewed

Binary file (268 kB)

files/swings_depth_fp32.npy DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b5728d846cd554d4a5e1d0e5f71d622135bca36164026b8e49668acdfa20e070
-size 1398912

files/switzerland.jpeg ADDED Viewed

Git LFS Details

SHA256: 81e35ba90f7736167ea3e8a0a58f932ecded07b00b012a5bd7df5dabbe0eb3ce
Pointer size: 131 Bytes
Size of remote file: 847 kB

files/teamwork.jpeg ADDED Viewed

Git LFS Details

SHA256: 3cd48af8f3db4d89760cd6f40f2716570e697ae74a9bd88ed1ba36c0e68326b3
Pointer size: 131 Bytes
Size of remote file: 700 kB

files/wave.jpeg ADDED Viewed

Git LFS Details

SHA256: 7f14e77f7990d75104d6e3447077eb176d6437c58f5fb0fffcdb6015193b2d03
Pointer size: 132 Bytes
Size of remote file: 1.07 MB

marigold_depth_estimation.py DELETED Viewed

@@ -1,632 +0,0 @@
-# Copyright 2024 Bingxin Ke, ETH Zurich and The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# --------------------------------------------------------------------------
-# If you find this code useful, we kindly ask you to cite our paper in your work.
-# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
-# More information about the method can be found at https://marigoldmonodepth.github.io
-# --------------------------------------------------------------------------
-import math
-from typing import Dict, Union
-import matplotlib
-import numpy as np
-import torch
-from PIL import Image
-from scipy.optimize import minimize
-from torch.utils.data import DataLoader, TensorDataset
-from tqdm.auto import tqdm
-from transformers import CLIPTextModel, CLIPTokenizer
-from diffusers import (
-    AutoencoderKL,
-    DDIMScheduler,
-    DiffusionPipeline,
-    UNet2DConditionModel,
-)
-from diffusers.utils import BaseOutput, check_min_version
-# Will error if the minimal version of diffusers is not installed. Remove at your own risks.
-check_min_version("0.27.0.dev0")
-class MarigoldDepthOutput(BaseOutput):
-    """
-    Output class for Marigold monocular depth prediction pipeline.
-    Args:
-        depth_np (`np.ndarray`):
-            Predicted depth map, with depth values in the range of [0, 1].
-        depth_colored (`None` or `PIL.Image.Image`):
-            Colorized depth map, with the shape of [3, H, W] and values in [0, 1].
-        uncertainty (`None` or `np.ndarray`):
-            Uncalibrated uncertainty(MAD, median absolute deviation) coming from ensembling.
-    """
-    depth_np: np.ndarray
-    depth_colored: Union[None, Image.Image]
-    uncertainty: Union[None, np.ndarray]
-class MarigoldPipeline(DiffusionPipeline):
-    """
-    Pipeline for monocular depth estimation using Marigold: https://marigoldmonodepth.github.io.
-    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
-    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
-    Args:
-        unet (`UNet2DConditionModel`):
-            Conditional U-Net to denoise the depth latent, conditioned on image latent.
-        vae (`AutoencoderKL`):
-            Variational Auto-Encoder (VAE) Model to encode and decode images and depth maps
-            to and from latent representations.
-        scheduler (`DDIMScheduler`):
-            A scheduler to be used in combination with `unet` to denoise the encoded image latents.
-        text_encoder (`CLIPTextModel`):
-            Text-encoder, for empty text embedding.
-        tokenizer (`CLIPTokenizer`):
-            CLIP tokenizer.
-    """
-    rgb_latent_scale_factor = 0.18215
-    depth_latent_scale_factor = 0.18215
-    def __init__(
-        self,
-        unet: UNet2DConditionModel,
-        vae: AutoencoderKL,
-        scheduler: DDIMScheduler,
-        text_encoder: CLIPTextModel,
-        tokenizer: CLIPTokenizer,
-    ):
-        super().__init__()
-        self.register_modules(
-            unet=unet,
-            vae=vae,
-            scheduler=scheduler,
-            text_encoder=text_encoder,
-            tokenizer=tokenizer,
-        )
-        self.empty_text_embed = None
-    @torch.no_grad()
-    def __call__(
-        self,
-        input_image: Image,
-        denoising_steps: int = 10,
-        ensemble_size: int = 10,
-        processing_res: int = 768,
-        match_input_res: bool = True,
-        batch_size: int = 0,
-        color_map: str = "Spectral",
-        show_progress_bar: bool = True,
-        ensemble_kwargs: Dict = None,
-    ) -> MarigoldDepthOutput:
-        """
-        Function invoked when calling the pipeline.
-        Args:
-            input_image (`Image`):
-                Input RGB (or gray-scale) image.
-            processing_res (`int`, *optional*, defaults to `768`):
-                Maximum resolution of processing.
-                If set to 0: will not resize at all.
-            match_input_res (`bool`, *optional*, defaults to `True`):
-                Resize depth prediction to match input resolution.
-                Only valid if `limit_input_res` is not None.
-            denoising_steps (`int`, *optional*, defaults to `10`):
-                Number of diffusion denoising steps (DDIM) during inference.
-            ensemble_size (`int`, *optional*, defaults to `10`):
-                Number of predictions to be ensembled.
-            batch_size (`int`, *optional*, defaults to `0`):
-                Inference batch size, no bigger than `num_ensemble`.
-                If set to 0, the script will automatically decide the proper batch size.
-            show_progress_bar (`bool`, *optional*, defaults to `True`):
-                Display a progress bar of diffusion denoising.
-            color_map (`str`, *optional*, defaults to `"Spectral"`, pass `None` to skip colorized depth map generation):
-                Colormap used to colorize the depth map.
-            ensemble_kwargs (`dict`, *optional*, defaults to `None`):
-                Arguments for detailed ensembling settings.
-        Returns:
-            `MarigoldDepthOutput`: Output class for Marigold monocular depth prediction pipeline, including:
-            - **depth_np** (`np.ndarray`) Predicted depth map, with depth values in the range of [0, 1]
-            - **depth_colored** (`None` or `PIL.Image.Image`) Colorized depth map, with the shape of [3, H, W] and
-                    values in [0, 1]. None if `color_map` is `None`
-            - **uncertainty** (`None` or `np.ndarray`) Uncalibrated uncertainty(MAD, median absolute deviation)
-                    coming from ensembling. None if `ensemble_size = 1`
-        """
-        device = self.device
-        input_size = input_image.size
-        if not match_input_res:
-            assert (
-                processing_res is not None
-            ), "Value error: `resize_output_back` is only valid with "
-        assert processing_res >= 0
-        assert denoising_steps >= 1
-        assert ensemble_size >= 1
-        # ----------------- Image Preprocess -----------------
-        # Resize image
-        if processing_res > 0:
-            input_image = self.resize_max_res(
-                input_image, max_edge_resolution=processing_res
-            )
-        # Convert the image to RGB, to 1.remove the alpha channel 2.convert B&W to 3-channel
-        input_image = input_image.convert("RGB")
-        image = np.asarray(input_image)
-        # Normalize rgb values
-        rgb = np.transpose(image, (2, 0, 1))  # [H, W, rgb] -> [rgb, H, W]
-        rgb_norm = rgb / 255.0 * 2.0 - 1.0  #  [0, 255] -> [-1, 1]
-        rgb_norm = torch.from_numpy(rgb_norm).to(self.dtype)
-        rgb_norm = rgb_norm.to(device)
-        assert rgb_norm.min() >= -1.0 and rgb_norm.max() <= 1.0
-        # ----------------- Predicting depth -----------------
-        # Batch repeated input image
-        duplicated_rgb = torch.stack([rgb_norm] * ensemble_size)
-        single_rgb_dataset = TensorDataset(duplicated_rgb)
-        if batch_size > 0:
-            _bs = batch_size
-        else:
-            _bs = self._find_batch_size(
-                ensemble_size=ensemble_size,
-                input_res=max(rgb_norm.shape[1:]),
-                dtype=self.dtype,
-            )
-        single_rgb_loader = DataLoader(
-            single_rgb_dataset, batch_size=_bs, shuffle=False
-        )
-        # Predict depth maps (batched)
-        depth_pred_ls = []
-        if show_progress_bar:
-            iterable = tqdm(
-                single_rgb_loader, desc=" " * 2 + "Inference batches", leave=False
-            )
-        else:
-            iterable = single_rgb_loader
-        for batch in iterable:
-            (batched_img,) = batch
-            depth_pred_raw = self.single_infer(
-                rgb_in=batched_img,
-                num_inference_steps=denoising_steps,
-                show_pbar=show_progress_bar,
-            )
-            depth_pred_ls.append(depth_pred_raw.detach().clone())
-        depth_preds = torch.concat(depth_pred_ls, axis=0).squeeze()
-        torch.cuda.empty_cache()  # clear vram cache for ensembling
-        # ----------------- Test-time ensembling -----------------
-        if ensemble_size > 1:
-            depth_pred, pred_uncert = self.ensemble_depths(
-                depth_preds, **(ensemble_kwargs or {})
-            )
-        else:
-            depth_pred = depth_preds
-            pred_uncert = None
-        # ----------------- Post processing -----------------
-        # Scale prediction to [0, 1]
-        min_d = torch.min(depth_pred)
-        max_d = torch.max(depth_pred)
-        depth_pred = (depth_pred - min_d) / (max_d - min_d)
-        # Convert to numpy
-        depth_pred = depth_pred.cpu().numpy().astype(np.float32)
-        # Resize back to original resolution
-        if match_input_res:
-            pred_img = Image.fromarray(depth_pred)
-            pred_img = pred_img.resize(input_size)
-            depth_pred = np.asarray(pred_img)
-        # Clip output range
-        depth_pred = depth_pred.clip(0, 1)
-        # Colorize
-        if color_map is not None:
-            depth_colored = self.colorize_depth_maps(
-                depth_pred, 0, 1, cmap=color_map
-            ).squeeze()  # [3, H, W], value in (0, 1)
-            depth_colored = (depth_colored * 255).astype(np.uint8)
-            depth_colored_hwc = self.chw2hwc(depth_colored)
-            depth_colored_img = Image.fromarray(depth_colored_hwc)
-        else:
-            depth_colored_img = None
-        return MarigoldDepthOutput(
-            depth_np=depth_pred,
-            depth_colored=depth_colored_img,
-            uncertainty=pred_uncert,
-        )
-    def _encode_empty_text(self):
-        """
-        Encode text embedding for empty prompt.
-        """
-        prompt = ""
-        text_inputs = self.tokenizer(
-            prompt,
-            padding="do_not_pad",
-            max_length=self.tokenizer.model_max_length,
-            truncation=True,
-            return_tensors="pt",
-        )
-        text_input_ids = text_inputs.input_ids.to(self.text_encoder.device)
-        self.empty_text_embed = self.text_encoder(text_input_ids)[0].to(self.dtype)
-    @torch.no_grad()
-    def single_infer(
-        self, rgb_in: torch.Tensor, num_inference_steps: int, show_pbar: bool
-    ) -> torch.Tensor:
-        """
-        Perform an individual depth prediction without ensembling.
-        Args:
-            rgb_in (`torch.Tensor`):
-                Input RGB image.
-            num_inference_steps (`int`):
-                Number of diffusion denoisign steps (DDIM) during inference.
-            show_pbar (`bool`):
-                Display a progress bar of diffusion denoising.
-        Returns:
-            `torch.Tensor`: Predicted depth map.
-        """
-        device = rgb_in.device
-        # Set timesteps
-        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps = self.scheduler.timesteps  # [T]
-        # Encode image
-        rgb_latent = self._encode_rgb(rgb_in)
-        # Initial depth map (noise)
-        depth_latent = torch.randn(
-            rgb_latent.shape, device=device, dtype=self.dtype
-        )  # [B, 4, h, w]
-        # Batched empty text embedding
-        if self.empty_text_embed is None:
-            self._encode_empty_text()
-        batch_empty_text_embed = self.empty_text_embed.repeat(
-            (rgb_latent.shape[0], 1, 1)
-        )  # [B, 2, 1024]
-        # Denoising loop
-        if show_pbar:
-            iterable = tqdm(
-                enumerate(timesteps),
-                total=len(timesteps),
-                leave=False,
-                desc=" " * 4 + "Diffusion denoising",
-            )
-        else:
-            iterable = enumerate(timesteps)
-        for i, t in iterable:
-            unet_input = torch.cat(
-                [rgb_latent, depth_latent], dim=1
-            )  # this order is important
-            # predict the noise residual
-            noise_pred = self.unet(
-                unet_input, t, encoder_hidden_states=batch_empty_text_embed
-            ).sample  # [B, 4, h, w]
-            # compute the previous noisy sample x_t -> x_t-1
-            depth_latent = self.scheduler.step(noise_pred, t, depth_latent).prev_sample
-        torch.cuda.empty_cache()
-        depth = self._decode_depth(depth_latent)
-        # clip prediction
-        depth = torch.clip(depth, -1.0, 1.0)
-        # shift to [0, 1]
-        depth = (depth + 1.0) / 2.0
-        return depth
-    def _encode_rgb(self, rgb_in: torch.Tensor) -> torch.Tensor:
-        """
-        Encode RGB image into latent.
-        Args:
-            rgb_in (`torch.Tensor`):
-                Input RGB image to be encoded.
-        Returns:
-            `torch.Tensor`: Image latent.
-        """
-        # encode
-        h = self.vae.encoder(rgb_in)
-        moments = self.vae.quant_conv(h)
-        mean, logvar = torch.chunk(moments, 2, dim=1)
-        # scale latent
-        rgb_latent = mean * self.rgb_latent_scale_factor
-        return rgb_latent
-    def _decode_depth(self, depth_latent: torch.Tensor) -> torch.Tensor:
-        """
-        Decode depth latent into depth map.
-        Args:
-            depth_latent (`torch.Tensor`):
-                Depth latent to be decoded.
-        Returns:
-            `torch.Tensor`: Decoded depth map.
-        """
-        # scale latent
-        depth_latent = depth_latent / self.depth_latent_scale_factor
-        # decode
-        z = self.vae.post_quant_conv(depth_latent)
-        stacked = self.vae.decoder(z)
-        # mean of output channels
-        depth_mean = stacked.mean(dim=1, keepdim=True)
-        return depth_mean
-    @staticmethod
-    def resize_max_res(img: Image.Image, max_edge_resolution: int) -> Image.Image:
-        """
-        Resize image to limit maximum edge length while keeping aspect ratio.
-        Args:
-            img (`Image.Image`):
-                Image to be resized.
-            max_edge_resolution (`int`):
-                Maximum edge length (pixel).
-        Returns:
-            `Image.Image`: Resized image.
-        """
-        original_width, original_height = img.size
-        downscale_factor = min(
-            max_edge_resolution / original_width, max_edge_resolution / original_height
-        )
-        new_width = int(original_width * downscale_factor)
-        new_height = int(original_height * downscale_factor)
-        resized_img = img.resize((new_width, new_height))
-        return resized_img
-    @staticmethod
-    def colorize_depth_maps(
-        depth_map, min_depth, max_depth, cmap="Spectral", valid_mask=None
-    ):
-        """
-        Colorize depth maps.
-        """
-        assert len(depth_map.shape) >= 2, "Invalid dimension"
-        if isinstance(depth_map, torch.Tensor):
-            depth = depth_map.detach().clone().squeeze().numpy()
-        elif isinstance(depth_map, np.ndarray):
-            depth = depth_map.copy().squeeze()
-        # reshape to [ (B,) H, W ]
-        if depth.ndim < 3:
-            depth = depth[np.newaxis, :, :]
-        # colorize
-        cm = matplotlib.colormaps[cmap]
-        depth = ((depth - min_depth) / (max_depth - min_depth)).clip(0, 1)
-        img_colored_np = cm(depth, bytes=False)[:, :, :, 0:3]  # value from 0 to 1
-        img_colored_np = np.rollaxis(img_colored_np, 3, 1)
-        if valid_mask is not None:
-            if isinstance(depth_map, torch.Tensor):
-                valid_mask = valid_mask.detach().numpy()
-            valid_mask = valid_mask.squeeze()  # [H, W] or [B, H, W]
-            if valid_mask.ndim < 3:
-                valid_mask = valid_mask[np.newaxis, np.newaxis, :, :]
-            else:
-                valid_mask = valid_mask[:, np.newaxis, :, :]
-            valid_mask = np.repeat(valid_mask, 3, axis=1)
-            img_colored_np[~valid_mask] = 0
-        if isinstance(depth_map, torch.Tensor):
-            img_colored = torch.from_numpy(img_colored_np).float()
-        elif isinstance(depth_map, np.ndarray):
-            img_colored = img_colored_np
-        return img_colored
-    @staticmethod
-    def chw2hwc(chw):
-        assert 3 == len(chw.shape)
-        if isinstance(chw, torch.Tensor):
-            hwc = torch.permute(chw, (1, 2, 0))
-        elif isinstance(chw, np.ndarray):
-            hwc = np.moveaxis(chw, 0, -1)
-        return hwc
-    @staticmethod
-    def _find_batch_size(ensemble_size: int, input_res: int, dtype: torch.dtype) -> int:
-        """
-        Automatically search for suitable operating batch size.
-        Args:
-            ensemble_size (`int`):
-                Number of predictions to be ensembled.
-            input_res (`int`):
-                Operating resolution of the input image.
-        Returns:
-            `int`: Operating batch size.
-        """
-        # Search table for suggested max. inference batch size
-        bs_search_table = [
-            # tested on A100-PCIE-80GB
-            {"res": 768, "total_vram": 79, "bs": 35, "dtype": torch.float32},
-            {"res": 1024, "total_vram": 79, "bs": 20, "dtype": torch.float32},
-            # tested on A100-PCIE-40GB
-            {"res": 768, "total_vram": 39, "bs": 15, "dtype": torch.float32},
-            {"res": 1024, "total_vram": 39, "bs": 8, "dtype": torch.float32},
-            {"res": 768, "total_vram": 39, "bs": 30, "dtype": torch.float16},
-            {"res": 1024, "total_vram": 39, "bs": 15, "dtype": torch.float16},
-            # tested on RTX3090, RTX4090
-            {"res": 512, "total_vram": 23, "bs": 20, "dtype": torch.float32},
-            {"res": 768, "total_vram": 23, "bs": 7, "dtype": torch.float32},
-            {"res": 1024, "total_vram": 23, "bs": 3, "dtype": torch.float32},
-            {"res": 512, "total_vram": 23, "bs": 40, "dtype": torch.float16},
-            {"res": 768, "total_vram": 23, "bs": 18, "dtype": torch.float16},
-            {"res": 1024, "total_vram": 23, "bs": 10, "dtype": torch.float16},
-            # tested on GTX1080Ti
-            {"res": 512, "total_vram": 10, "bs": 5, "dtype": torch.float32},
-            {"res": 768, "total_vram": 10, "bs": 2, "dtype": torch.float32},
-            {"res": 512, "total_vram": 10, "bs": 10, "dtype": torch.float16},
-            {"res": 768, "total_vram": 10, "bs": 5, "dtype": torch.float16},
-            {"res": 1024, "total_vram": 10, "bs": 3, "dtype": torch.float16},
-        ]
-        if not torch.cuda.is_available():
-            return 1
-        total_vram = torch.cuda.mem_get_info()[1] / 1024.0**3
-        filtered_bs_search_table = [s for s in bs_search_table if s["dtype"] == dtype]
-        for settings in sorted(
-            filtered_bs_search_table,
-            key=lambda k: (k["res"], -k["total_vram"]),
-        ):
-            if input_res <= settings["res"] and total_vram >= settings["total_vram"]:
-                bs = settings["bs"]
-                if bs > ensemble_size:
-                    bs = ensemble_size
-                elif bs > math.ceil(ensemble_size / 2) and bs < ensemble_size:
-                    bs = math.ceil(ensemble_size / 2)
-                return bs
-        return 1
-    @staticmethod
-    def ensemble_depths(
-        input_images: torch.Tensor,
-        regularizer_strength: float = 0.02,
-        max_iter: int = 2,
-        tol: float = 1e-3,
-        reduction: str = "median",
-        max_res: int = None,
-    ):
-        """
-        To ensemble multiple affine-invariant depth images (up to scale and shift),
-            by aligning estimating the scale and shift
-        """
-        def inter_distances(tensors: torch.Tensor):
-            """
-            To calculate the distance between each two depth maps.
-            """
-            distances = []
-            for i, j in torch.combinations(torch.arange(tensors.shape[0])):
-                arr1 = tensors[i : i + 1]
-                arr2 = tensors[j : j + 1]
-                distances.append(arr1 - arr2)
-            dist = torch.concatenate(distances, dim=0)
-            return dist
-        device = input_images.device
-        dtype = input_images.dtype
-        np_dtype = np.float32
-        original_input = input_images.clone()
-        n_img = input_images.shape[0]
-        ori_shape = input_images.shape
-        if max_res is not None:
-            scale_factor = torch.min(max_res / torch.tensor(ori_shape[-2:]))
-            if scale_factor < 1:
-                downscaler = torch.nn.Upsample(
-                    scale_factor=scale_factor, mode="nearest"
-                )
-                input_images = downscaler(torch.from_numpy(input_images)).numpy()
-        # init guess
-        _min = np.min(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1)
-        _max = np.max(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1)
-        s_init = 1.0 / (_max - _min).reshape((-1, 1, 1))
-        t_init = (-1 * s_init.flatten() * _min.flatten()).reshape((-1, 1, 1))
-        x = np.concatenate([s_init, t_init]).reshape(-1).astype(np_dtype)
-        input_images = input_images.to(device)
-        # objective function
-        def closure(x):
-            l = len(x)
-            s = x[: int(l / 2)]
-            t = x[int(l / 2) :]
-            s = torch.from_numpy(s).to(dtype=dtype).to(device)
-            t = torch.from_numpy(t).to(dtype=dtype).to(device)
-            transformed_arrays = input_images * s.view((-1, 1, 1)) + t.view((-1, 1, 1))
-            dists = inter_distances(transformed_arrays)
-            sqrt_dist = torch.sqrt(torch.mean(dists**2))
-            if "mean" == reduction:
-                pred = torch.mean(transformed_arrays, dim=0)
-            elif "median" == reduction:
-                pred = torch.median(transformed_arrays, dim=0).values
-            else:
-                raise ValueError
-            near_err = torch.sqrt((0 - torch.min(pred)) ** 2)
-            far_err = torch.sqrt((1 - torch.max(pred)) ** 2)
-            err = sqrt_dist + (near_err + far_err) * regularizer_strength
-            err = err.detach().cpu().numpy().astype(np_dtype)
-            return err
-        res = minimize(
-            closure,
-            x,
-            method="BFGS",
-            tol=tol,
-            options={"maxiter": max_iter, "disp": False},
-        )
-        x = res.x
-        l = len(x)
-        s = x[: int(l / 2)]
-        t = x[int(l / 2) :]
-        # Prediction
-        s = torch.from_numpy(s).to(dtype=dtype).to(device)
-        t = torch.from_numpy(t).to(dtype=dtype).to(device)
-        transformed_arrays = original_input * s.view(-1, 1, 1) + t.view(-1, 1, 1)
-        if "mean" == reduction:
-            aligned_images = torch.mean(transformed_arrays, dim=0)
-            std = torch.std(transformed_arrays, dim=0)
-            uncertainty = std
-        elif "median" == reduction:
-            aligned_images = torch.median(transformed_arrays, dim=0).values
-            # MAD (median absolute deviation) as uncertainty indicator
-            abs_dev = torch.abs(transformed_arrays - aligned_images)
-            mad = torch.median(abs_dev, dim=0).values
-            uncertainty = mad
-        else:
-            raise ValueError(f"Unknown reduction method: {reduction}")
-        # Scale and shift to [0, 1]
-        _min = torch.min(aligned_images)
-        _max = torch.max(aligned_images)
-        aligned_images = (aligned_images - _min) / (_max - _min)
-        uncertainty /= _max - _min
-        return aligned_images, uncertainty

marigold_logo_square.jpg DELETED Viewed

Binary file (76 kB)

requirements.txt CHANGED Viewed

@@ -1,13 +1,9 @@
-gradio==4.21.0
-gradio-imageslider==0.0.16
-pygltflib==1.16.1
-trimesh==4.0.5
-spaces>=0.25.0
-accelerate>=0.22.0
-diffusers==0.27.2
-matplotlib==3.8.2
-scipy==1.11.4
-torch==2.0.1
-transformers>=4.32.1
-xformers>=0.0.21

+diffusers>=0.32.2
+git+https://github.com/toshas/gradio-dualvision.git
+accelerate
+huggingface_hub
+scipy
+torch
+tqdm
+transformers
+xformers