Spaces:

oxkitsune
/

rerun-ml-depth-pro

Running

App Files Files Community

oxkitsune commited on Oct 8, 2024

Commit

4d29a77

1 Parent(s): 846566d

move image to correct device

Browse files

Files changed (1) hide show

app.py +26 -5

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import subprocess
 import torch
 import cv2
 import os
 from pathlib import Path
 import gradio as gr
@@ -24,9 +25,30 @@ model, transform = depth_pro.create_model_and_transforms()
 model = model.to(device)
 model.eval()
-@rr.thread_local_stream("rerun_example_ml_depth_pro")
 @spaces.GPU(duration=20)
 def run_ml_depth_pro(frame):
     stream = rr.binary_stream()
@@ -51,16 +73,15 @@ def run_ml_depth_pro(frame):
     rr.set_time_sequence("frame", 0)
     rr.log("world/camera/image", rr.Image(frame))
-    image = transform(frame)
-    prediction = model.infer(image)
-    depth = prediction["depth"].squeeze().detach().cpu().numpy()
     rr.log(
         "world/camera",
         rr.Pinhole(
             width=frame.shape[1],
             height=frame.shape[0],
-            focal_length=prediction["focallength_px"].item(),
             principal_point=(frame.shape[1] / 2, frame.shape[0] / 2),
             image_plane_distance=depth.max(),
         ),

 import torch
 import cv2
+import numpy as np
 import os
 from pathlib import Path
 import gradio as gr
 model = model.to(device)
 model.eval()
+def resize_image(image_path, max_size=1536):
+    with Image.open(image_path) as img:
+        # Calculate the new size while maintaining aspect ratio
+        ratio = max_size / max(img.size)
+        new_size = tuple([int(x * ratio) for x in img.size])
+        # Resize the image
+        img = img.resize(new_size, Image.LANCZOS)
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+            img.save(temp_file, format="PNG")
+            return temp_file.name
 @spaces.GPU(duration=20)
+def predict(frame):
+    image = transform(frame)
+    image = image.to(device)
+    prediction = model.infer(image)
+    depth = prediction["depth"].squeeze().detach().cpu().numpy()
+    return depth.cpu().numpy(), prediction["focallength_px"].item()
+@rr.thread_local_stream("rerun_example_ml_depth_pro")
 def run_ml_depth_pro(frame):
     stream = rr.binary_stream()
     rr.set_time_sequence("frame", 0)
     rr.log("world/camera/image", rr.Image(frame))
+    depth, focal_length = predict(frame)
     rr.log(
         "world/camera",
         rr.Pinhole(
             width=frame.shape[1],
             height=frame.shape[0],
+            focal_length=focal_length,
             principal_point=(frame.shape[1] / 2, frame.shape[0] / 2),
             image_plane_distance=depth.max(),
         ),