Spaces:

robinwitch
/

SynTalker

Running on Zero

robinwitch commited on Oct 9, 2024

Commit

bd336d0

1 Parent(s): f218e94

add

Files changed (1) hide show

app.py CHANGED Viewed

@@ -106,19 +106,19 @@ class BaseTrainer(object):
         self.checkpoint_path = tmp_dir
         args.tmp_dir = tmp_dir
-        if self.rank == 0:
-            self.test_data = __import__(f"dataloaders.{args.dataset}", fromlist=["something"]).CustomDataset(args, "test")
-            self.test_loader = torch.utils.data.DataLoader(
-                self.test_data,
-                batch_size=1,
-                shuffle=False,
-                num_workers=args.loader_workers,
-                drop_last=False,
-            )
         logger.info(f"Init test dataloader success")
-        model_module = __import__(f"models.{args.model}", fromlist=["something"])
-        self.model = getattr(model_module, args.g_name)(args).cuda()
         if self.rank == 0:
             logger.info(self.model)
@@ -768,7 +768,8 @@ demo = gr.Interface(
     ],
     title='SynTalker: Enabling Synergistic Full-Body Control in Prompt-Based Co-Speech Motion Generation',
     description="1. Upload your audio.  <br/>\
-        2. Then, sit back and wait for the rendering to happen! This may take a while (e.g. 2 minutes) <br/>\
         3. After, you can view the videos.  <br/>\
         4. Notice that we use a fix face animation, our method only produce body motion. <br/>\
         5. Use DDPM sample strategy will generate a better result, while it will take more inference time.  \

         self.checkpoint_path = tmp_dir
         args.tmp_dir = tmp_dir
+        self.test_data = __import__(f"dataloaders.{args.dataset}", fromlist=["something"]).CustomDataset(args, "test")
+        self.test_loader = torch.utils.data.DataLoader(
+            self.test_data,
+            batch_size=1,
+            shuffle=False,
+            num_workers=args.loader_workers,
+            drop_last=False,
+        )
         logger.info(f"Init test dataloader success")
+        from models.denoiser import MDM
+        self.model = MDM(args).cuda()
         if self.rank == 0:
             logger.info(self.model)
     ],
     title='SynTalker: Enabling Synergistic Full-Body Control in Prompt-Based Co-Speech Motion Generation',
     description="1. Upload your audio.  <br/>\
+        2. Then, sit back and wait for the rendering to happen! This may take a while (e.g. 2-5 minutes) <br/>\
+        (The reason of running time so long is that provided GPU have an limitation in GPU running time, we must use CPU to handle some GPU tasks)\
         3. After, you can view the videos.  <br/>\
         4. Notice that we use a fix face animation, our method only produce body motion. <br/>\
         5. Use DDPM sample strategy will generate a better result, while it will take more inference time.  \