Update app.py
Browse files
app.py
CHANGED
|
@@ -17,31 +17,34 @@ semanticodec = load_model()
|
|
| 17 |
|
| 18 |
@spaces.GPU(duration=20)
|
| 19 |
def encode_audio(audio_path):
|
| 20 |
-
"""Encode audio file to tokens and
|
| 21 |
tokens = semanticodec.encode(audio_path)
|
| 22 |
# Move tokens to CPU before converting to numpy
|
| 23 |
if isinstance(tokens, torch.Tensor):
|
| 24 |
tokens = tokens.cpu().numpy()
|
| 25 |
|
| 26 |
-
# Save to a
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
if
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
@spaces.GPU(duration=60)
|
| 38 |
-
def decode_tokens(
|
| 39 |
"""Decode tokens to audio"""
|
| 40 |
-
# Ensure the
|
| 41 |
-
if not
|
| 42 |
-
return None, "Error: Empty
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
tokens = np.load(token_path)
|
| 45 |
# Convert to torch tensor if needed by the model
|
| 46 |
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
| 47 |
tokens = torch.tensor(tokens)
|
|
@@ -50,7 +53,7 @@ def decode_tokens(token_path):
|
|
| 50 |
if isinstance(waveform, torch.Tensor):
|
| 51 |
waveform = waveform.cpu().numpy()
|
| 52 |
|
| 53 |
-
# Create in-memory file
|
| 54 |
output_buffer = io.BytesIO()
|
| 55 |
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
| 56 |
output_buffer.seek(0)
|
|
@@ -95,14 +98,14 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
| 95 |
with gr.Tab("Encode Audio"):
|
| 96 |
with gr.Row():
|
| 97 |
encode_input = gr.Audio(type="filepath", label="Input Audio")
|
| 98 |
-
encode_output = gr.File(label="Encoded Tokens (.oterin)")
|
| 99 |
encode_status = gr.Textbox(label="Status")
|
| 100 |
encode_btn = gr.Button("Encode")
|
| 101 |
encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
|
| 102 |
|
| 103 |
with gr.Tab("Decode Tokens"):
|
| 104 |
with gr.Row():
|
| 105 |
-
decode_input = gr.File(label="Token File (.oterin)")
|
| 106 |
decode_output = gr.Audio(label="Decoded Audio")
|
| 107 |
decode_status = gr.Textbox(label="Status")
|
| 108 |
decode_btn = gr.Button("Decode")
|
|
|
|
| 17 |
|
| 18 |
@spaces.GPU(duration=20)
|
| 19 |
def encode_audio(audio_path):
|
| 20 |
+
"""Encode audio file to tokens and return them as a binary buffer"""
|
| 21 |
tokens = semanticodec.encode(audio_path)
|
| 22 |
# Move tokens to CPU before converting to numpy
|
| 23 |
if isinstance(tokens, torch.Tensor):
|
| 24 |
tokens = tokens.cpu().numpy()
|
| 25 |
|
| 26 |
+
# Save to a BytesIO buffer
|
| 27 |
+
buffer = io.BytesIO()
|
| 28 |
+
np.save(buffer, tokens)
|
| 29 |
+
buffer.seek(0)
|
| 30 |
|
| 31 |
+
# Verify the buffer has content
|
| 32 |
+
if buffer.getbuffer().nbytes == 0:
|
| 33 |
+
raise Exception("Failed to create token buffer")
|
| 34 |
+
|
| 35 |
+
return buffer, f"Encoded to {len(tokens)} tokens"
|
| 36 |
|
| 37 |
@spaces.GPU(duration=60)
|
| 38 |
+
def decode_tokens(token_buffer):
|
| 39 |
"""Decode tokens to audio"""
|
| 40 |
+
# Ensure the buffer has content
|
| 41 |
+
if not token_buffer or token_buffer.getbuffer().nbytes == 0:
|
| 42 |
+
return None, "Error: Empty token buffer"
|
| 43 |
+
|
| 44 |
+
# Reset buffer position to start
|
| 45 |
+
token_buffer.seek(0)
|
| 46 |
+
tokens = np.load(token_buffer)
|
| 47 |
|
|
|
|
| 48 |
# Convert to torch tensor if needed by the model
|
| 49 |
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
| 50 |
tokens = torch.tensor(tokens)
|
|
|
|
| 53 |
if isinstance(waveform, torch.Tensor):
|
| 54 |
waveform = waveform.cpu().numpy()
|
| 55 |
|
| 56 |
+
# Create in-memory file for audio
|
| 57 |
output_buffer = io.BytesIO()
|
| 58 |
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
| 59 |
output_buffer.seek(0)
|
|
|
|
| 98 |
with gr.Tab("Encode Audio"):
|
| 99 |
with gr.Row():
|
| 100 |
encode_input = gr.Audio(type="filepath", label="Input Audio")
|
| 101 |
+
encode_output = gr.File(label="Encoded Tokens (.oterin)", file_types=[".oterin"])
|
| 102 |
encode_status = gr.Textbox(label="Status")
|
| 103 |
encode_btn = gr.Button("Encode")
|
| 104 |
encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
|
| 105 |
|
| 106 |
with gr.Tab("Decode Tokens"):
|
| 107 |
with gr.Row():
|
| 108 |
+
decode_input = gr.File(label="Token File (.oterin)", file_types=[".oterin"])
|
| 109 |
decode_output = gr.Audio(label="Decoded Audio")
|
| 110 |
decode_status = gr.Textbox(label="Status")
|
| 111 |
decode_btn = gr.Button("Decode")
|