Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ from pathlib import Path
|
|
15 |
|
16 |
# Initialize the model and ensure it's on the correct device
|
17 |
def load_model():
|
18 |
-
model = SemantiCodec(token_rate=
|
19 |
if torch.cuda.is_available():
|
20 |
# Move the model to CUDA and ensure it's fully initialized on CUDA
|
21 |
model = model.to("cuda:0")
|
@@ -200,7 +200,7 @@ def stream_both(audio_path):
|
|
200 |
yield None, f"Encoded to {tokens.shape[1]} tokens, starting decoding..."
|
201 |
|
202 |
# If tokens are too small, decode all at once
|
203 |
-
if tokens.shape[1] < 500
|
204 |
# Convert to torch tensor with Long dtype for embedding
|
205 |
tokens_tensor = torch.tensor(tokens, dtype=torch.long).to(model_device)
|
206 |
|
@@ -215,7 +215,7 @@ def stream_both(audio_path):
|
|
215 |
return
|
216 |
|
217 |
# Split tokens into chunks for streaming
|
218 |
-
chunk_size =
|
219 |
num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size # Ceiling division
|
220 |
|
221 |
all_audio_chunks = []
|
@@ -282,7 +282,7 @@ def stream_decode_tokens(token_file):
|
|
282 |
semanticodec.to(model_device)
|
283 |
|
284 |
# If tokens are too small, decode all at once
|
285 |
-
if tokens.shape[1] < 500
|
286 |
# Convert to torch tensor with Long dtype for embedding
|
287 |
tokens_tensor = torch.tensor(tokens, dtype=torch.long)
|
288 |
tokens_tensor = tokens_tensor.to(model_device)
|
@@ -297,7 +297,7 @@ def stream_decode_tokens(token_file):
|
|
297 |
return
|
298 |
|
299 |
# Split tokens into chunks for streaming
|
300 |
-
chunk_size =
|
301 |
num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size # Ceiling division
|
302 |
|
303 |
# First status update
|
|
|
15 |
|
16 |
# Initialize the model and ensure it's on the correct device
|
17 |
def load_model():
|
18 |
+
model = SemantiCodec(token_rate=100, semantic_vocab_size=32768) # 0.35 kbps
|
19 |
if torch.cuda.is_available():
|
20 |
# Move the model to CUDA and ensure it's fully initialized on CUDA
|
21 |
model = model.to("cuda:0")
|
|
|
200 |
yield None, f"Encoded to {tokens.shape[1]} tokens, starting decoding..."
|
201 |
|
202 |
# If tokens are too small, decode all at once
|
203 |
+
if tokens.shape[1] < 1500: # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
|
204 |
# Convert to torch tensor with Long dtype for embedding
|
205 |
tokens_tensor = torch.tensor(tokens, dtype=torch.long).to(model_device)
|
206 |
|
|
|
215 |
return
|
216 |
|
217 |
# Split tokens into chunks for streaming
|
218 |
+
chunk_size = 1500 # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
|
219 |
num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size # Ceiling division
|
220 |
|
221 |
all_audio_chunks = []
|
|
|
282 |
semanticodec.to(model_device)
|
283 |
|
284 |
# If tokens are too small, decode all at once
|
285 |
+
if tokens.shape[1] < 1500: # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
|
286 |
# Convert to torch tensor with Long dtype for embedding
|
287 |
tokens_tensor = torch.tensor(tokens, dtype=torch.long)
|
288 |
tokens_tensor = tokens_tensor.to(model_device)
|
|
|
297 |
return
|
298 |
|
299 |
# Split tokens into chunks for streaming
|
300 |
+
chunk_size = 1500 # Changed from 500 to 1500 (15 seconds at 100 tokens/sec)
|
301 |
num_chunks = (tokens.shape[1] + chunk_size - 1) // chunk_size # Ceiling division
|
302 |
|
303 |
# First status update
|