Spaces:

chronopt-research
/

SwinTExCo

Sleeping

App Files Files Community

duongttr commited on Dec 3, 2023

Commit

3d85088

1 Parent(s): efb56b8

Update new app

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -20
.gitignore +2 -1
README.md +0 -6
UI.py +0 -81
app.py +47 -213
app_config.py +9 -0
checkpoints/{colornet.pth → epoch_10/colornet.pth} +1 -1
examples/bear/video.mp4 → checkpoints/epoch_10/discriminator.pth +2 -2
checkpoints/{embed_net.pth → epoch_10/embed_net.pth} +0 -0
checkpoints/epoch_10/learning_state.pth +3 -0
checkpoints/{nonlocal_net.pth → epoch_10/nonlocal_net.pth} +1 -1
checkpoints/epoch_12/colornet.pth +3 -0
examples/cows/video.mp4 → checkpoints/epoch_12/discriminator.pth +2 -2
checkpoints/epoch_12/embed_net.pth +3 -0
checkpoints/epoch_12/learning_state.pth +3 -0
checkpoints/epoch_12/nonlocal_net.pth +3 -0
checkpoints/epoch_16/colornet.pth +3 -0
checkpoints/epoch_16/discriminator.pth +3 -0
checkpoints/epoch_16/embed_net.pth +3 -0
checkpoints/epoch_16/learning_state.pth +3 -0
checkpoints/epoch_16/nonlocal_net.pth +3 -0
checkpoints/epoch_20/colornet.pth +3 -0
checkpoints/epoch_20/discriminator.pth +3 -0
checkpoints/epoch_20/embed_net.pth +3 -0
checkpoints/epoch_20/learning_state.pth +3 -0
checkpoints/epoch_20/nonlocal_net.pth +3 -0
cmd.txt +0 -21
cmd_ddp.txt +0 -20
environment.yml +0 -0
examples/bear/ref.jpg +0 -0
examples/boat/ref.jpg +0 -0
examples/boat/video.mp4 +0 -0
examples/cows/ref.jpg +0 -0
examples/flamingo/ref.jpg +0 -0
examples/flamingo/video.mp4 +0 -3
examples/man/ref.jpg +0 -0
examples/man/video.mp4 +0 -3
examples/military/ref.jpg +0 -0
examples/military/video.mp4 +0 -3
gradio_cached_examples/13/log.csv +0 -5
gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4 +0 -3
gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4 +0 -0
gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4 +0 -3
gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4 +0 -0
inputs/video.mp4/000000000.jpg +0 -0
inputs/video.mp4/000000001.jpg +0 -0
inputs/video.mp4/000000002.jpg +0 -0
inputs/video.mp4/000000003.jpg +0 -0
inputs/video.mp4/000000004.jpg +0 -0
inputs/video.mp4/000000005.jpg +0 -0

.gitattributes CHANGED Viewed

@@ -1,4 +1,3 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
@@ -33,22 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/bear/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/bear/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/boat/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/cows/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/cows/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/dog/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/clips/flamingo/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
-EvalDataset/ref/goat/0000.jpg filter=lfs diff=lfs merge=lfs -text
-EvalDataset/ref/hockey/0000.jpg filter=lfs diff=lfs merge=lfs -text
-EvalDataset/ref/horsejump-high/0000.jpg filter=lfs diff=lfs merge=lfs -text
-EvalDataset/ref/motorbike/0000.jpg filter=lfs diff=lfs merge=lfs -text
-EvalDataset/ref/surf/0000.jpg filter=lfs diff=lfs merge=lfs -text
-examples/bear/video.mp4 filter=lfs diff=lfs merge=lfs -text
-examples/cows/video.mp4 filter=lfs diff=lfs merge=lfs -text
-examples/flamingo/video.mp4 filter=lfs diff=lfs merge=lfs -text
-gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
-gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
-examples/man/video.mp4 filter=lfs diff=lfs merge=lfs -text
-examples/military/video.mp4 filter=lfs diff=lfs merge=lfs -text

 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,4 +1,5 @@
-checkpoints/
 wandb/
 .vscode
 .DS_Store

+flagged/
+sample_output/
 wandb/
 .vscode
 .DS_Store

README.md DELETED Viewed

@@ -1,6 +0,0 @@
----
-title: ViTExCo
-app_file: app.py
-sdk: gradio
-sdk_version: 3.40.1
----

UI.py DELETED Viewed

@@ -1,81 +0,0 @@
-import streamlit as st
-from PIL import Image
-import torchvision.transforms as transforms
-from streamlit_image_comparison import image_comparison
-import numpy as np
-import torch
-import torchvision
-######################################### Utils ########################################
-video_extensions = ["mp4"]
-image_extensions = ["png", "jpg"]
-def check_type(file_name: str):
-    for image_extension in image_extensions:
-        if file_name.endswith(image_extension):
-            return "image"
-    for video_extension in video_extensions:
-        if file_name.endswith(video_extension):
-            return "video"
-    return None
-transform = transforms.Compose(
-    [transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
-)
-###################################### Load model ######################################
-@st.cache_resource
-def load_model():
-    model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
-    model.eval()
-    return model
-model = load_model()
-########################################## UI ##########################################
-st.title("Colorization")
-uploaded_file = st.file_uploader("Upload grayscale image or video", type=image_extensions + video_extensions)
-if uploaded_file:
-    # Image
-    if check_type(file_name=uploaded_file.name) == "image":
-        image = np.array(Image.open(uploaded_file), dtype=np.float32)
-        input_tensor = torchvision.transforms.functional.normalize(
-            torch.tensor(image).permute(2, 0, 1),
-            mean=[0.485, 0.456, 0.406],
-            std=[0.229, 0.224, 0.225],
-        ).unsqueeze(0)
-        process_button = st.button("Process")
-        if process_button:
-            with st.spinner("Từ từ coi..."):
-                prediction = model(input_tensor)
-                segment = prediction["out"][0].permute(1, 2, 0)
-                segment = segment.detach().numpy()
-                st.image(segment)
-                st.image(image)
-                image_comparison(
-                    img1=image,
-                    img2=np.array(segment),
-                    label1="Grayscale",
-                    label2="Colorized",
-                    make_responsive=True,
-                    show_labels=True,
-                )
-    # Video
-    else:
-        # video = open(uploaded_file.name)
-        st.video("https://youtu.be/dQw4w9WgXcQ")
-hide_menu_style = """
-        <style>
-        #MainMenu {visibility: hidden; }
-        footer {visibility: hidden;}
-        </style>
-        """
-st.markdown(hide_menu_style, unsafe_allow_html=True)

app.py CHANGED Viewed

@@ -1,215 +1,49 @@
-import numpy as np
-import shutil
 import os
-import argparse
-import torch
-import glob
-from tqdm import tqdm
 from PIL import Image
-from collections import OrderedDict
-from src.models.vit.config import load_config
-import torchvision.transforms as transforms
-import cv2
-from skimage import io
-from src.models.CNN.ColorVidNet import GeneralColorVidNet
-from src.models.vit.embed import GeneralEmbedModel
-from src.models.CNN.NonlocalNet import GeneralWarpNet
-from src.models.CNN.FrameColor import frame_colorization
-from src.utils import (
-    RGB2Lab,
-    ToTensor,
-    Normalize,
-    uncenter_l,
-    tensor_lab2rgb,
-    SquaredPadding,
-    UnpaddingSquare
-)
-import gradio as gr
-def load_params(ckpt_file):
-    params = torch.load(ckpt_file, map_location=device)
-    new_params = []
-    for key, value in params.items():
-        new_params.append((key, value))
-    return OrderedDict(new_params)
-def custom_transform(transforms, img):
-    for transform in transforms:
-        if isinstance(transform, SquaredPadding):
-            img,padding=transform(img, return_paddings=True)
-        else:
-            img = transform(img)
-    return img.to(device), padding
-def save_frames(predicted_rgb, video_name, frame_name):
-    if predicted_rgb is not None:
-        predicted_rgb = np.clip(predicted_rgb, 0, 255).astype(np.uint8)
-        # frame_path_parts = frame_path.split(os.sep)
-        # if os.path.exists(os.path.join(OUTPUT_RESULT_PATH, frame_path_parts[-2])):
-        #   shutil.rmtree(os.path.join(OUTPUT_RESULT_PATH, frame_path_parts[-2]))
-        # os.makedirs(os.path.join(OUTPUT_RESULT_PATH, frame_path_parts[-2]), exist_ok=True)
-        predicted_rgb = np.transpose(predicted_rgb, (1,2,0))
-        pil_img = Image.fromarray(predicted_rgb)
-        pil_img.save(os.path.join(OUTPUT_RESULT_PATH, video_name, frame_name))
-def extract_frames_from_video(video_path):
-  cap = cv2.VideoCapture(video_path)
-  fps = cap.get(cv2.CAP_PROP_FPS)
-  # remove if exists folder
-  output_frames_path = os.path.join(INPUT_VIDEO_FRAMES_PATH, os.path.basename(video_path))
-  if os.path.exists(output_frames_path):
-    shutil.rmtree(output_frames_path)
-  # make new folder
-  os.makedirs(output_frames_path)
-  currentframe = 0
-  frame_path_list = []
-  while(True):
-      # reading from frame
-      ret,frame = cap.read()
-      if ret:
-          name = os.path.join(output_frames_path, f'{currentframe:09d}.jpg')
-          frame_path_list.append(name)
-          cv2.imwrite(name, frame)
-          currentframe += 1
-      else:
-          break
-  cap.release()
-  cv2.destroyAllWindows()
-  return frame_path_list, fps
-def combine_frames_from_folder(frames_list_path, fps = 30):
-  frames_list = glob.glob(f'{frames_list_path}/*.jpg')
-  frames_list.sort()
-  sample_shape = cv2.imread(frames_list[0]).shape
-  output_video_path = os.path.join(frames_list_path, 'output_video.mp4')
-  out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (sample_shape[1], sample_shape[0]))
-  for filename in frames_list:
-      img = cv2.imread(filename)
-      out.write(img)
-  out.release()
-  return output_video_path
-def upscale_image(I_current_rgb, I_current_ab_predict):
-    H, W = I_current_rgb.size
-    high_lab_transforms = [
-      SquaredPadding(target_size=max(H,W)),
-      RGB2Lab(),
-      ToTensor(),
-      Normalize()
-    ]
-    # current_frame_pil_rgb = Image.fromarray(np.clip(I_current_rgb.squeeze(0).permute(1,2,0).cpu().numpy() * 255, 0, 255).astype('uint8'))
-    high_lab_current, paddings = custom_transform(high_lab_transforms, I_current_rgb)
-    high_lab_current = torch.unsqueeze(high_lab_current,dim=0).to(device)
-    high_l_current = high_lab_current[:, 0:1, :, :]
-    high_ab_current = high_lab_current[:, 1:3, :, :]
-    upsampler = torch.nn.Upsample(scale_factor=max(H,W)/224,mode="bilinear")
-    high_ab_predict = upsampler(I_current_ab_predict)
-    I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(high_l_current), high_ab_predict), dim=1))
-    upadded = UnpaddingSquare()
-    I_predict_rgb = upadded(I_predict_rgb, paddings)
-    return I_predict_rgb
-def colorize_video(video_path, ref_np):
-    frames_list, fps = extract_frames_from_video(video_path)
-    frame_ref = Image.fromarray(ref_np).convert("RGB")
-    I_last_lab_predict = None
-    IB_lab, IB_paddings = custom_transform(transforms, frame_ref)
-    IB_lab = IB_lab.unsqueeze(0).to(device)
-    IB_l = IB_lab[:, 0:1, :, :]
-    IB_ab = IB_lab[:, 1:3, :, :]
-    with torch.no_grad():
-        I_reference_lab = IB_lab
-        I_reference_l = I_reference_lab[:, 0:1, :, :]
-        I_reference_ab = I_reference_lab[:, 1:3, :, :]
-        I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1)).to(device)
-        features_B = embed_net(I_reference_rgb)
-    video_path_parts = frames_list[0].split(os.sep)
-    if os.path.exists(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2])):
-      shutil.rmtree(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2]))
-    os.makedirs(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2]), exist_ok=True)
-    for frame_path in tqdm(frames_list):
-        curr_frame = Image.open(frame_path).convert("RGB")
-        IA_lab, IA_paddings = custom_transform(transforms, curr_frame)
-        IA_lab = IA_lab.unsqueeze(0).to(device)
-        IA_l = IA_lab[:, 0:1, :, :]
-        IA_ab = IA_lab[:, 1:3, :, :]
-        if I_last_lab_predict is None:
-            I_last_lab_predict = torch.zeros_like(IA_lab).to(device)
-        with torch.no_grad():
-            I_current_lab = IA_lab
-            I_current_ab_predict, _ = frame_colorization(
-                IA_l,
-                I_reference_lab,
-                I_last_lab_predict,
-                features_B,
-                embed_net,
-                nonlocal_net,
-                colornet,
-                luminance_noise=0,
-                temperature=1e-10,
-                joint_training=False
-            )
-            I_last_lab_predict = torch.cat((IA_l, I_current_ab_predict), dim=1)
-        # IA_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(IA_l), I_current_ab_predict), dim=1))
-        IA_predict_rgb = upscale_image(curr_frame, I_current_ab_predict)
-        #IA_predict_rgb = torch.nn.functional.upsample_bilinear(IA_predict_rgb, scale_factor=2)
-        save_frames(IA_predict_rgb.squeeze(0).cpu().numpy() * 255, video_path_parts[-2], os.path.basename(frame_path))
-    return combine_frames_from_folder(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2]), fps)
-if __name__ == '__main__':
-  # Init global variables
-  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-  INPUT_VIDEO_FRAMES_PATH = 'inputs'
-  OUTPUT_RESULT_PATH = 'outputs'
-  weight_path = 'checkpoints'
-  embed_net=GeneralEmbedModel(pretrained_model="swin-tiny", device=device).to(device)
-  nonlocal_net = GeneralWarpNet(feature_channel=128).to(device)
-  colornet=GeneralColorVidNet(7).to(device)
-  embed_net.eval()
-  nonlocal_net.eval()
-  colornet.eval()
-  # Load weights
-  # embed_net_params = load_params(os.path.join(weight_path, "embed_net.pth"))
-  nonlocal_net_params = load_params(os.path.join(weight_path, "nonlocal_net.pth"))
-  colornet_params = load_params(os.path.join(weight_path, "colornet.pth"))
-  # embed_net.load_state_dict(embed_net_params, strict=True)
-  nonlocal_net.load_state_dict(nonlocal_net_params, strict=True)
-  colornet.load_state_dict(colornet_params, strict=True)
-  transforms = [SquaredPadding(target_size=224),
-                RGB2Lab(),
-                ToTensor(),
-                Normalize()]
-  #examples = [[vid, ref] for vid, ref in zip(sorted(glob.glob('examples/*/*.mp4')), sorted(glob.glob('examples/*/*.jpg')))]
-  demo = gr.Interface(colorize_video,
-                      inputs=[gr.Video(), gr.Image()],
-                      outputs="playable_video")#,
-                      #examples=examples,
-                      #cache_examples=True)
-  demo.launch()

+import gradio as gr
+from src.inference import SwinTExCo
+import cv2
 import os
 from PIL import Image
+import time
+import app_config as cfg
+model = SwinTExCo(weights_path=cfg.ckpt_path)
+def video_colorization(video_path, ref_image, progress=gr.Progress()):
+    # Initialize video reader
+    video_reader = cv2.VideoCapture(video_path)
+    fps = video_reader.get(cv2.CAP_PROP_FPS)
+    height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
+    num_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Initialize reference image
+    ref_image = Image.fromarray(ref_image)
+    # Initialize video writer
+    output_path = os.path.join(os.path.dirname(video_path), os.path.basename(video_path).split('.')[0] + '_colorized.mp4')
+    video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
+    # Init progress bar
+    for colorized_frame, _ in zip(model.predict_video(video_reader, ref_image), progress.tqdm(range(num_frames), desc="Colorizing video", unit="frames")):
+        video_writer.write(colorized_frame)
+    # for i in progress.tqdm(range(1000)):
+    #     time.sleep(0.5)
+    video_writer.release()
+    return output_path
+app = gr.Interface(
+    fn=video_colorization,
+    inputs=[gr.Video(format="mp4", sources="upload", label="Input video (grayscale)", interactive=True),
+            gr.Image(sources="upload", label="Reference image (color)")],
+    outputs=gr.Video(label="Output video (colorized)"),
+    title=cfg.TITLE,
+    description=cfg.DESCRIPTION
+).queue()
+app.launch()

app_config.py ADDED Viewed

	@@ -0,0 +1,9 @@

+ckpt_path = 'checkpoints/epoch_20'
+TITLE = 'Deep Exemplar-based Video Colorization using Vision Transformer'
+DESCRIPTION = '''
+<center>
+This is a demo app of the thesis: <b>Deep Exemplar-based Video Colorization using Vision Transformer</b>.<br/>
+The code is available at: <i>The link will be updated soon</i>.<br/>
+Our previous work was also written into paper and accepted at the <a href="https://ictc.org/program_proceeding">ICTC 2023 conference</a> (Section <i>B1-4</i>).
+</center>
+'''.strip()

checkpoints/{colornet.pth → epoch_10/colornet.pth} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5257ae325e292cd5fb2eff47095e1c4e4815455bd5fb6dc5ed2ee2b923172875
 size 131239411

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ecb43b5e02b77bec5342e2e296d336bf8f384a07d3c809d1a548fd5fb1e7365
 size 131239411

examples/bear/video.mp4 → checkpoints/epoch_10/discriminator.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb4cec5064873a4616f78bdb653830683a4842b2a5cfd0665b395cff4d120d04
-size 1263445

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce8968a9d3d2f99b1bc1e32080507e0d671cee00b66200105c8839be684b84b4
+size 45073068

checkpoints/{embed_net.pth → epoch_10/embed_net.pth} RENAMED Viewed

File without changes

checkpoints/epoch_10/learning_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d09b1e96fdf0205930a21928449a44c51cedd965cc0d573068c73971bcb8bd2
+size 748166487

checkpoints/{nonlocal_net.pth → epoch_10/nonlocal_net.pth} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b94c6990f20088bc3cc3fe0b29a6d52e6e746b915c506f0cd349fc6ad6197e72
 size 73189765

 version https://git-lfs.github.com/spec/v1
+oid sha256:86c97d6803d625a0dff8c6c09b70852371906eb5ef77df0277c27875666a68e2
 size 73189765

checkpoints/epoch_12/colornet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50f4b92cd59f4c88c0c1d7c93652413d54b1b96d729fc4b93e235887b5164f28
+size 131239846

examples/cows/video.mp4 → checkpoints/epoch_12/discriminator.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ac08603d719cd7a8d71fac76c9318d3e8f1e516e9b3c2a06323a0e4e78f6410
-size 2745681

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b54b0bad6ceec33569cc5833cbf03ed8ddbb5f07998aa634badf8298d3cd15f
+size 45073513

checkpoints/epoch_12/embed_net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73e2a156c0737e3d063af0e95e1e7176362e85120b88275a1aa02dcf488e1865
+size 110352698

checkpoints/epoch_12/learning_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f8bb4dbb3cb8e497a9a2079947f0221823fa8b44695e2d2ad8478be48464fad
+size 748166934

checkpoints/epoch_12/nonlocal_net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1f76b53dad7bf15c7d26aa106c95387e75751b8c31fafef2bd73ea7d77160cb
+size 73190208

checkpoints/epoch_16/colornet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81ec9cff0ad5b0d920179fa7a9cc229e1424bfc796b7134604ff66b97d748c49
+size 131239846

checkpoints/epoch_16/discriminator.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42262d5ed7596f38e65774085222530eee57da8dfaa7fe1aa223d824ed166f62
+size 45073513

checkpoints/epoch_16/embed_net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73e2a156c0737e3d063af0e95e1e7176362e85120b88275a1aa02dcf488e1865
+size 110352698

checkpoints/epoch_16/learning_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4cf81341750ebf517c696a0f6241bfeede0584b0ce75ad208e3ffc8280877f
+size 748166934

checkpoints/epoch_16/nonlocal_net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85b63363bc9c79732df78ba50ed19491ed86e961214bbd1f796a871334eba516
+size 73190208

checkpoints/epoch_20/colornet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c524f4e5df5f6ce91db1973a30de55299ebcbbde1edd2009718d3b4cd2631339
+size 131239846

checkpoints/epoch_20/discriminator.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd80950c796fcfe6e4b6bdeeb358776700458d868da94ee31df3d1d37779310
+size 45073513

checkpoints/epoch_20/embed_net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73e2a156c0737e3d063af0e95e1e7176362e85120b88275a1aa02dcf488e1865
+size 110352698

checkpoints/epoch_20/learning_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b1163b210b246b07d8f1c50eb3766d97c6f03bf409c854d00b7c69edb6d7391
+size 748166934

checkpoints/epoch_20/nonlocal_net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:031e5f38cc79eb3c0ed51ca2ad3c8921fdda2fa05946c357f84881259de74e6d
+size 73190208

cmd.txt DELETED Viewed

@@ -1,21 +0,0 @@
-python train.py --video_data_root_list datasets/images/images \
-                --flow_data_root_list datasets/flow_fp16/flow_fp16 \
-                --mask_data_root_list datasets/pgm/pgm \
-                --data_root_imagenet datasets/imgnet \
-                --annotation_file_path datasets/final_annot.csv \
-                --imagenet_pairs_file datasets/pairs.txt \
-                --gpu_ids 0 \
-                --workers 12 \
-                --batch_size 2 \
-                --real_reference_probability 0.99 \
-                --weight_contextual 1 \
-                --weight_perceptual 0.1 \
-                --weight_smoothness 5 \
-                --weight_gan 0.9 \
-                --weight_consistent 0.1 \
-                --use_wandb True \
-                --wandb_token "f05d31e6b15339b1cfc5ee1c77fe51f66fc3ea9e" \
-                --wandb_name "vit_tiny_patch16_384_nofeat" \
-                --checkpoint_step 500 \
-                --epoch_train_discriminator 3 \
-                --epoch 20

cmd_ddp.txt DELETED Viewed

@@ -1,20 +0,0 @@
-!torchrun --nnodes=1 --nproc_per_node=2 train_ddp.py --video_data_root_list $video_data_root_list \
-                --flow_data_root_list $flow_data_root_list \
-                --mask_data_root_list $mask_data_root_list \
-                --data_root_imagenet $data_root_imagenet \
-                --annotation_file_path $annotation_file_path \
-                --imagenet_pairs_file $imagenet_pairs_file \
-                --gpu_ids "0,1" \
-                --workers 2 \
-                --batch_size 2 \
-                --real_reference_probability 0.99 \
-                --weight_contextual 1 \
-                --weight_perceptual 0.1 \
-                --weight_smoothness 5 \
-                --weight_gan 0.9 \
-                --weight_consistent 0.1 \
-                --wandb_token "165e7148081f263b423722115e2ad40fa5339ecf" \
-                --wandb_name "vit_tiny_patch16_384_nofeat" \
-                --checkpoint_step 2000 \
-                --epoch_train_discriminator 2 \
-                --epoch 10

environment.yml DELETED Viewed

File without changes

examples/bear/ref.jpg DELETED Viewed

Binary file (30.9 kB)

examples/boat/ref.jpg DELETED Viewed

Binary file (65.4 kB)

examples/boat/video.mp4 DELETED Viewed

Binary file (853 kB)

examples/cows/ref.jpg DELETED Viewed

Binary file (252 kB)

examples/flamingo/ref.jpg DELETED Viewed

Binary file (539 kB)

examples/flamingo/video.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5a103fd4991a00e419e5236b885fe9d220704ba0a6ac794c87aaa3f62a4f1561
-size 1239570

examples/man/ref.jpg DELETED Viewed

Binary file (176 kB)

examples/man/video.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dff54d74e38285d60e064a0332c66d3ca2860f3c05de814a63693a9c331e94c9
-size 1693420

examples/military/ref.jpg DELETED Viewed

Binary file (111 kB)

examples/military/video.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:02ce1717c2f5768af588a0bbeb47c659e54a310a880a52b68a8a7701647e145a
-size 1495376

gradio_cached_examples/13/log.csv DELETED Viewed

@@ -1,5 +0,0 @@
-output,flag,username,timestamp
-/content/ViTExCo/gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4,,,2023-08-15 09:45:37.897615
-/content/ViTExCo/gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4,,,2023-08-15 09:46:01.048997
-/content/ViTExCo/gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4,,,2023-08-15 09:46:34.503322
-/content/ViTExCo/gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4,,,2023-08-15 09:46:58.088903

gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b5ab666998e14fb00281a90f8801753eca001a432641ae2770007a8336b4c64e
-size 1213824

gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4 DELETED Viewed

Binary file (914 kB)

gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c367dab34e596f7f0fed34c7e2384525de2ba1824b410d0770bdbd17bc9e72a
-size 1793060

gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4 DELETED Viewed

Binary file (673 kB)

inputs/video.mp4/000000000.jpg DELETED Viewed

Binary file (113 kB)

inputs/video.mp4/000000001.jpg DELETED Viewed

Binary file (146 kB)

inputs/video.mp4/000000002.jpg DELETED Viewed

Binary file (143 kB)

inputs/video.mp4/000000003.jpg DELETED Viewed

Binary file (141 kB)

inputs/video.mp4/000000004.jpg DELETED Viewed

Binary file (142 kB)

inputs/video.mp4/000000005.jpg DELETED Viewed

Binary file (141 kB)