import rerun as rr
import rerun.blueprint as rrb
import depth_pro

import torch
import cv2
from pathlib import Path
import gradio as gr
from gradio_rerun import Rerun


@rr.thread_local_stream("rerun_example_ml_depth_pro")
def run_ml_depth_pro(model, transform, frames):
    stream = rr.binary_stream()

    assert model is not None, "Model is None"
    assert transform is not None, "Transform is None"
    assert frames is not None, "Frames is None"

    blueprint = rrb.Blueprint(
        rrb.Spatial3DView(origin="/"),
        rrb.Horizontal(
            rrb.Spatial2DView(origin="/world/camera/depth", title="Depth"),
            rrb.Spatial2DView(origin="/world/camera/image", title="Image"),
        ),
        collapse_panels=True,
    )

    rr.send_blueprint(blueprint)

    for i, frame in enumerate(frames):
        rr.set_time_sequence("frame", i)
        rr.log("world/camera/image", rr.Image(frame))

        image = transform(frame)
        prediction = model.infer(image)
        depth = prediction["depth"].squeeze().detach().cpu().numpy()

        rr.log(
            "world/camera",
            rr.Pinhole(
                width=frame.shape[1],
                height=frame.shape[0],
                focal_length=prediction["focallength_px"].item(),
                principal_point=(frame.shape[1] / 2, frame.shape[0] / 2),
                image_plane_distance=depth.max(),
            ),
        )

        rr.log(
            "world/camera/depth",
            # need 0.19 stable for this
            # rr.DepthImage(depth, meter=1, depth_range=(depth.min(), depth.max())),
            rr.DepthImage(depth, meter=1),
        )

        yield stream.read()


video_path = Path("hd-cat.mp4")
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

# Load model and preprocessing transform
model, transform = depth_pro.create_model_and_transforms(device=device)
model.eval()


# Load video
frames = []
video = cv2.VideoCapture("hd-cat2.mp4")
while True:
    read, frame = video.read()
    if not read:
        break
    frame = cv2.resize(frame, (320, 240))
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frames.append(frame)

with gr.Blocks() as demo:
    with gr.Tab("Streaming"):
        with gr.Row():
            img = gr.Image(interactive=True, label="Image")
            with gr.Column():
                stream_ml_depth_pro = gr.Button("Stream Ml Depth Pro")
        with gr.Row():
            viewer = Rerun(
                streaming=True,
                panel_states={
                    "time": "collapsed",
                    "blueprint": "hidden",
                    "selection": "hidden",
                },
            )
        stream_ml_depth_pro.click(
            run_ml_depth_pro, inputs=[model, transform, frames], outputs=[viewer]
        )


if __name__ == "__main__":
    demo.launch()