Spaces:

owiedotch
/

oac

Sleeping

App Files Files Community

owiedotch commited on Feb 27

Commit

2c32151

verified ·

1 Parent(s): 84f6dd0

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -21

app.py CHANGED Viewed

@@ -13,11 +13,7 @@ from pathlib import Path
 # Initialize the model
 def load_model():
-    model = SemantiCodec(token_rate=100, semantic_vocab_size=32768)  # 1.40 kbps
-    # Explicitly move model to CUDA
-    if torch.cuda.is_available():
-        model = model.cuda()
-    return model
 semanticodec = load_model()
@@ -26,7 +22,7 @@ def encode_audio(audio_path):
     """Encode audio file to tokens and return them as a file"""
     try:
         tokens = semanticodec.encode(audio_path)
-        # Move tokens to CPU only for numpy conversion
         if isinstance(tokens, torch.Tensor):
             tokens = tokens.cpu().numpy()
@@ -79,20 +75,19 @@ def decode_tokens(token_file):
                 # Reshape to match expected format [batch, seq_len, features]
                 tokens = tokens.reshape(1, -1, 1)
-            # Convert to torch tensor and move to CUDA explicitly
             tokens = torch.tensor(tokens)
-            tokens = tokens.cuda()  # Force to CUDA
-        # Force any tensor objects to cuda to be safe
-        if isinstance(tokens, torch.Tensor) and not tokens.is_cuda:
-            tokens = tokens.cuda()
         # Decode the tokens
         waveform = semanticodec.decode(tokens)
-        # Move waveform to CPU ONLY at the end for audio processing
         if isinstance(waveform, torch.Tensor):
-            waveform = waveform.detach().cpu().numpy()
         # Create in-memory file for audio
         output_buffer = io.BytesIO()
@@ -105,8 +100,7 @@ def decode_tokens(token_file):
         return output_buffer, f"Decoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
-        import traceback
-        return None, f"Error decoding tokens: {str(e)}\n{traceback.format_exc()}"
 @spaces.GPU(duration=80)
 def process_both(audio_path):
@@ -122,15 +116,19 @@ def process_both(audio_path):
             # Reshape to match expected format [batch, seq_len, features]
             tokens = tokens.reshape(1, -1, 1)
-        # Convert back to torch tensor and move to CUDA explicitly
-        tokens_tensor = torch.tensor(tokens).cuda()  # Force to CUDA
         # Decode
         waveform = semanticodec.decode(tokens_tensor)
-        # Move waveform to CPU ONLY at the end for audio processing
         if isinstance(waveform, torch.Tensor):
-            waveform = waveform.detach().cpu().numpy()
         # Create in-memory file
         output_buffer = io.BytesIO()
@@ -143,8 +141,7 @@ def process_both(audio_path):
         return output_buffer, f"Encoded to {tokens.shape[1]} tokens\nDecoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
-        import traceback
-        return None, f"Error processing audio: {str(e)}\n{traceback.format_exc()}"
 # Create Gradio interface
 with gr.Blocks(title="Oterin Audio Codec") as demo:

 # Initialize the model
 def load_model():
+    return SemantiCodec(token_rate=100, semantic_vocab_size=32768)  # 1.40 kbps
 semanticodec = load_model()
     """Encode audio file to tokens and return them as a file"""
     try:
         tokens = semanticodec.encode(audio_path)
+        # Move tokens to CPU before converting to numpy
         if isinstance(tokens, torch.Tensor):
             tokens = tokens.cpu().numpy()
                 # Reshape to match expected format [batch, seq_len, features]
                 tokens = tokens.reshape(1, -1, 1)
+            # Convert to torch tensor (on CPU first)
             tokens = torch.tensor(tokens)
+        # Explicitly move tokens to CUDA
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        tokens = tokens.to(device)
         # Decode the tokens
         waveform = semanticodec.decode(tokens)
+        # Move waveform to CPU for audio processing
         if isinstance(waveform, torch.Tensor):
+            waveform = waveform.cpu().numpy()
         # Create in-memory file for audio
         output_buffer = io.BytesIO()
         return output_buffer, f"Decoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
+        return None, f"Error decoding tokens: {str(e)}"
 @spaces.GPU(duration=80)
 def process_both(audio_path):
             # Reshape to match expected format [batch, seq_len, features]
             tokens = tokens.reshape(1, -1, 1)
+        # Convert back to torch tensor (on CPU first)
+        tokens_tensor = torch.tensor(tokens)
+        # Explicitly move tokens to CUDA
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        tokens_tensor = tokens_tensor.to(device)
         # Decode
         waveform = semanticodec.decode(tokens_tensor)
+        # Move waveform to CPU for audio processing
         if isinstance(waveform, torch.Tensor):
+            waveform = waveform.cpu().numpy()
         # Create in-memory file
         output_buffer = io.BytesIO()
         return output_buffer, f"Encoded to {tokens.shape[1]} tokens\nDecoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
+        return None, f"Error processing audio: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="Oterin Audio Codec") as demo: