Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ from semanticodec import SemantiCodec
|
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
import spaces
|
| 8 |
import torch
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Initialize the model
|
| 11 |
def load_model():
|
|
@@ -13,15 +15,19 @@ def load_model():
|
|
| 13 |
|
| 14 |
semanticodec = load_model()
|
| 15 |
|
| 16 |
-
@spaces.GPU(duration=
|
| 17 |
def encode_audio(audio_path):
|
| 18 |
"""Encode audio file to tokens and save them"""
|
| 19 |
tokens = semanticodec.encode(audio_path)
|
| 20 |
# Move tokens to CPU before converting to numpy
|
| 21 |
if isinstance(tokens, torch.Tensor):
|
| 22 |
tokens = tokens.cpu().numpy()
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
| 26 |
|
| 27 |
@spaces.GPU(duration=60)
|
|
@@ -35,15 +41,35 @@ def decode_tokens(token_path):
|
|
| 35 |
# Move waveform to CPU if it's a tensor
|
| 36 |
if isinstance(waveform, torch.Tensor):
|
| 37 |
waveform = waveform.cpu().numpy()
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
|
|
|
| 42 |
def process_both(audio_path):
|
| 43 |
-
"""Encode and then decode the audio"""
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# Create Gradio interface
|
| 49 |
with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
@@ -75,4 +101,4 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
| 75 |
both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
|
| 76 |
|
| 77 |
if __name__ == "__main__":
|
| 78 |
-
demo.launch()
|
|
|
|
| 6 |
from huggingface_hub import HfApi
|
| 7 |
import spaces
|
| 8 |
import torch
|
| 9 |
+
import tempfile
|
| 10 |
+
import io
|
| 11 |
|
| 12 |
# Initialize the model
|
| 13 |
def load_model():
|
|
|
|
| 15 |
|
| 16 |
semanticodec = load_model()
|
| 17 |
|
| 18 |
+
@spaces.GPU(duration=20)
|
| 19 |
def encode_audio(audio_path):
|
| 20 |
"""Encode audio file to tokens and save them"""
|
| 21 |
tokens = semanticodec.encode(audio_path)
|
| 22 |
# Move tokens to CPU before converting to numpy
|
| 23 |
if isinstance(tokens, torch.Tensor):
|
| 24 |
tokens = tokens.cpu().numpy()
|
| 25 |
+
|
| 26 |
+
# Save to a temporary file
|
| 27 |
+
with tempfile.NamedTemporaryFile(suffix='.oterin', delete=False) as tmp_file:
|
| 28 |
+
np.save(tmp_file.name, tokens)
|
| 29 |
+
token_path = tmp_file.name
|
| 30 |
+
|
| 31 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
| 32 |
|
| 33 |
@spaces.GPU(duration=60)
|
|
|
|
| 41 |
# Move waveform to CPU if it's a tensor
|
| 42 |
if isinstance(waveform, torch.Tensor):
|
| 43 |
waveform = waveform.cpu().numpy()
|
| 44 |
+
|
| 45 |
+
# Create in-memory file
|
| 46 |
+
output_buffer = io.BytesIO()
|
| 47 |
+
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
| 48 |
+
output_buffer.seek(0)
|
| 49 |
+
|
| 50 |
+
return output_buffer, f"Decoded {len(tokens)} tokens to audio"
|
| 51 |
|
| 52 |
+
@spaces.GPU(duration=80)
|
| 53 |
def process_both(audio_path):
|
| 54 |
+
"""Encode and then decode the audio without saving intermediate files"""
|
| 55 |
+
# Encode
|
| 56 |
+
tokens = semanticodec.encode(audio_path)
|
| 57 |
+
if isinstance(tokens, torch.Tensor):
|
| 58 |
+
tokens = tokens.cpu().numpy()
|
| 59 |
+
|
| 60 |
+
# Decode directly
|
| 61 |
+
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
| 62 |
+
tokens = torch.tensor(tokens)
|
| 63 |
+
waveform = semanticodec.decode(tokens)
|
| 64 |
+
if isinstance(waveform, torch.Tensor):
|
| 65 |
+
waveform = waveform.cpu().numpy()
|
| 66 |
+
|
| 67 |
+
# Create in-memory file
|
| 68 |
+
output_buffer = io.BytesIO()
|
| 69 |
+
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
| 70 |
+
output_buffer.seek(0)
|
| 71 |
+
|
| 72 |
+
return output_buffer, f"Encoded to {len(tokens)} tokens\nDecoded {len(tokens)} tokens to audio"
|
| 73 |
|
| 74 |
# Create Gradio interface
|
| 75 |
with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
|
|
| 101 |
both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
|
| 102 |
|
| 103 |
if __name__ == "__main__":
|
| 104 |
+
demo.launch(share=True)
|