Spaces:

fffiloni
/

RAFT

Paused

App Files Files Community

fffiloni commited on Mar 4, 2023

Commit

7e3803b

1 Parent(s): 30681b9

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -35

app.py CHANGED Viewed

@@ -59,10 +59,10 @@ import tempfile
 from pathlib import Path
 from urllib.request import urlretrieve
-video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
-video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
-_ = urlretrieve(video_url, video_path)
 #########################
 # :func:`~torchvision.io.read_video` returns the video frames, audio frames and
@@ -73,11 +73,11 @@ _ = urlretrieve(video_url, video_path)
 # namely frames (100, 101) and (150, 151). Each of these pairs corresponds to a
 # single model input.
-from torchvision.io import read_video
-frames, _, _ = read_video(str(video_path), output_format="TCHW")
-img1= [frames[100]
-img2 = [frames[101]
 #########################
 # The RAFT model accepts RGB images. We first get the frames from
@@ -86,21 +86,21 @@ img2 = [frames[101]
 # weights in order to preprocess the input and rescale its values to the
 # required ``[-1, 1]`` interval.
-from torchvision.models.optical_flow import Raft_Large_Weights
-weights = Raft_Large_Weights.DEFAULT
-transforms = weights.transforms()
-def preprocess(img, img2):
-    img1 = F.resize(img1, size=[520, 960])
-    img2 = F.resize(img2, size=[520, 960])
-    return transforms(img1, img2)
-img1, img2 = preprocess(img1, img2)
-print(f"shape = {img1.shape}, dtype = {img1.dtype}")
 ####################################
@@ -112,17 +112,17 @@ print(f"shape = {img1.shape}, dtype = {img1.dtype}")
 # We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
 # builder, which is smaller and faster to run, sacrificing a bit of accuracy.
-from torchvision.models.optical_flow import raft_large
 # If you can, run this example on a GPU, it will be a lot faster.
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
-model = model.eval()
-list_of_flows = model(img1.to(device), img2.to(device))
-print(f"type = {type(list_of_flows)}")
-print(f"length = {len(list_of_flows)} = number of iterations of the model")
 ####################################
 # The RAFT model outputs lists of predicted flows where each entry is a
@@ -137,10 +137,10 @@ print(f"length = {len(list_of_flows)} = number of iterations of the model")
 # vertical displacement of each pixel from the first image to the second image.
 # Note that the predicted flows are in "pixel" unit, they are not normalized
 # w.r.t. the dimensions of the images.
-predicted_flows = list_of_flows[-1]
-print(f"dtype = {predicted_flows.dtype}")
-print(f"shape = {predicted_flows.shape} = (N, 2, H, W)")
-print(f"min = {predicted_flows.min()}, max = {predicted_flows.max()}")
 ####################################
@@ -155,15 +155,13 @@ print(f"min = {predicted_flows.min()}, max = {predicted_flows.max()}")
 # of the ball in the first image (going to the left) and in the second image
 # (going up).
-from torchvision.utils import flow_to_image
-flow_imgs = flow_to_image(predicted_flows)
-# The images have been mapped into [-1, 1] but for plotting we want them in [0, 1]
-img1 = [(img1 + 1) / 2 for img1 in img1]
-grid = [[img1, flow_img] for (img1, flow_img) in zip(img1, flow_imgs)]
-plot(grid)
 ####################################
 # Bonus: Creating GIFs of predicted flows
@@ -208,4 +206,6 @@ def write_flo(filename, flow):
     w.tofile(f)
     h.tofile(f)
     flow.tofile(f)
-    f.close()

 from pathlib import Path
 from urllib.request import urlretrieve
+def infer():
+    video_url = "https://download.pytorch.org/tutorial/pexelscom_pavel_danilyuk_basketball_hd.mp4"
+    video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
+    _ = urlretrieve(video_url, video_path)
 #########################
 # :func:`~torchvision.io.read_video` returns the video frames, audio frames and
 # namely frames (100, 101) and (150, 151). Each of these pairs corresponds to a
 # single model input.
+    from torchvision.io import read_video
+    frames, _, _ = read_video(str(video_path), output_format="TCHW")
+    img1= [frames[100]
+    img2 = [frames[101]
 #########################
 # The RAFT model accepts RGB images. We first get the frames from
 # weights in order to preprocess the input and rescale its values to the
 # required ``[-1, 1]`` interval.
+    from torchvision.models.optical_flow import Raft_Large_Weights
+    weights = Raft_Large_Weights.DEFAULT
+    transforms = weights.transforms()
+    def preprocess(img, img2):
+        img1 = F.resize(img1, size=[520, 960])
+        img2 = F.resize(img2, size=[520, 960])
+        return transforms(img1, img2)
+    img1, img2 = preprocess(img1, img2)
+    print(f"shape = {img1.shape}, dtype = {img1.dtype}")
 ####################################
 # We also provide the :func:`~torchvision.models.optical_flow.raft_small` model
 # builder, which is smaller and faster to run, sacrificing a bit of accuracy.
+    from torchvision.models.optical_flow import raft_large
 # If you can, run this example on a GPU, it will be a lot faster.
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
+    model = model.eval()
+    list_of_flows = model(img1.to(device), img2.to(device))
+    print(f"type = {type(list_of_flows)}")
+    print(f"length = {len(list_of_flows)} = number of iterations of the model")
 ####################################
 # The RAFT model outputs lists of predicted flows where each entry is a
 # vertical displacement of each pixel from the first image to the second image.
 # Note that the predicted flows are in "pixel" unit, they are not normalized
 # w.r.t. the dimensions of the images.
+    predicted_flows = list_of_flows[-1]
+    print(f"dtype = {predicted_flows.dtype}")
+    print(f"shape = {predicted_flows.shape} = (N, 2, H, W)")
+    print(f"min = {predicted_flows.min()}, max = {predicted_flows.max()}")
 ####################################
 # of the ball in the first image (going to the left) and in the second image
 # (going up).
+    from torchvision.utils import flow_to_image
+    flow_imgs = flow_to_image(predicted_flows)
+    print(flow_imgs)
+    return "done"
 ####################################
 # Bonus: Creating GIFs of predicted flows
     w.tofile(f)
     h.tofile(f)
     flow.tofile(f)
+    f.close()
+gr.Interface(fn=infer, inputs=[], outputs=gr.Textbox()).launch()