Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import soundfile as sf
|
|
| 5 |
from semanticodec import SemantiCodec
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
import spaces
|
|
|
|
| 8 |
|
| 9 |
# Initialize the model
|
| 10 |
def load_model():
|
|
@@ -16,6 +17,9 @@ semanticodec = load_model()
|
|
| 16 |
def encode_audio(audio_path):
|
| 17 |
"""Encode audio file to tokens and save them"""
|
| 18 |
tokens = semanticodec.encode(audio_path)
|
|
|
|
|
|
|
|
|
|
| 19 |
token_path = "encoded_audio.oterin"
|
| 20 |
np.save(token_path, tokens)
|
| 21 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
|
@@ -24,7 +28,13 @@ def encode_audio(audio_path):
|
|
| 24 |
def decode_tokens(token_path):
|
| 25 |
"""Decode tokens to audio"""
|
| 26 |
tokens = np.load(token_path)
|
|
|
|
|
|
|
|
|
|
| 27 |
waveform = semanticodec.decode(tokens)
|
|
|
|
|
|
|
|
|
|
| 28 |
output_path = "output.wav"
|
| 29 |
sf.write(output_path, waveform[0, 0], 32000)
|
| 30 |
return output_path, f"Decoded {len(tokens)} tokens to audio"
|
|
|
|
| 5 |
from semanticodec import SemantiCodec
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
import spaces
|
| 8 |
+
import torch
|
| 9 |
|
| 10 |
# Initialize the model
|
| 11 |
def load_model():
|
|
|
|
| 17 |
def encode_audio(audio_path):
|
| 18 |
"""Encode audio file to tokens and save them"""
|
| 19 |
tokens = semanticodec.encode(audio_path)
|
| 20 |
+
# Move tokens to CPU before converting to numpy
|
| 21 |
+
if isinstance(tokens, torch.Tensor):
|
| 22 |
+
tokens = tokens.cpu().numpy()
|
| 23 |
token_path = "encoded_audio.oterin"
|
| 24 |
np.save(token_path, tokens)
|
| 25 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
|
|
|
| 28 |
def decode_tokens(token_path):
|
| 29 |
"""Decode tokens to audio"""
|
| 30 |
tokens = np.load(token_path)
|
| 31 |
+
# Convert to torch tensor if needed by the model
|
| 32 |
+
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
| 33 |
+
tokens = torch.tensor(tokens)
|
| 34 |
waveform = semanticodec.decode(tokens)
|
| 35 |
+
# Move waveform to CPU if it's a tensor
|
| 36 |
+
if isinstance(waveform, torch.Tensor):
|
| 37 |
+
waveform = waveform.cpu().numpy()
|
| 38 |
output_path = "output.wav"
|
| 39 |
sf.write(output_path, waveform[0, 0], 32000)
|
| 40 |
return output_path, f"Decoded {len(tokens)} tokens to audio"
|