Spaces:

owiedotch
/

oac

Sleeping

App Files Files Community

owiedotch commited on Feb 27

Commit

49098ca

verified ·

1 Parent(s): be68594

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from pathlib import Path
 # Initialize the model and ensure it's on the correct device
 def load_model():
-    model = SemantiCodec(token_rate=25, semantic_vocab_size=32768)  # 0.35 kbps
     if torch.cuda.is_available():
         # Move the model to CUDA and ensure it's fully initialized on CUDA
         model = model.to("cuda:0")
@@ -200,7 +200,7 @@ def stream_both(audio_path):
         yield None, f"Encoded to {tokens.shape[1]} tokens, starting decoding..."
         # If tokens are too small, decode all at once
-        if tokens.shape[1] < 500:
             # Convert to torch tensor with Long dtype for embedding
             tokens_tensor = torch.tensor(tokens, dtype=torch.long).to(model_device)
@@ -215,7 +215,7 @@ def stream_both(audio_path):
             return
         # Split tokens into chunks for streaming
-        chunk_size = 500  # Number of tokens per chunk
         num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size  # Ceiling division
         all_audio_chunks = []
@@ -282,7 +282,7 @@ def stream_decode_tokens(token_file):
         semanticodec.to(model_device)
         # If tokens are too small, decode all at once
-        if tokens.shape[1] < 500:
             # Convert to torch tensor with Long dtype for embedding
             tokens_tensor = torch.tensor(tokens, dtype=torch.long)
             tokens_tensor = tokens_tensor.to(model_device)
@@ -297,7 +297,7 @@ def stream_decode_tokens(token_file):
             return
         # Split tokens into chunks for streaming
-        chunk_size = 500  # Number of tokens per chunk
         num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size  # Ceiling division
         # First status update

 # Initialize the model and ensure it's on the correct device
 def load_model():
+    model = SemantiCodec(token_rate=100, semantic_vocab_size=32768)  # 0.35 kbps
     if torch.cuda.is_available():
         # Move the model to CUDA and ensure it's fully initialized on CUDA
         model = model.to("cuda:0")
         yield None, f"Encoded to {tokens.shape[1]} tokens, starting decoding..."
         # If tokens are too small, decode all at once
+        if tokens.shape[1] < 1500:  # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
             # Convert to torch tensor with Long dtype for embedding
             tokens_tensor = torch.tensor(tokens, dtype=torch.long).to(model_device)
             return
         # Split tokens into chunks for streaming
+        chunk_size = 1500  # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
         num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size  # Ceiling division
         all_audio_chunks = []
         semanticodec.to(model_device)
         # If tokens are too small, decode all at once
+        if tokens.shape[1] < 1500:  # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
             # Convert to torch tensor with Long dtype for embedding
             tokens_tensor = torch.tensor(tokens, dtype=torch.long)
             tokens_tensor = tokens_tensor.to(model_device)
             return
         # Split tokens into chunks for streaming
+        chunk_size = 1500  # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
         num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size  # Ceiling division
         # First status update