Spaces:

owiedotch
/

oac

Sleeping

App Files Files Community

owiedotch commited on Feb 27

Commit

e173c02

verified ·

1 Parent(s): 5c44be0

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -11

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from semanticodec import SemantiCodec
 from huggingface_hub import HfApi
 import spaces
 import torch
 # Initialize the model
 def load_model():
@@ -13,15 +15,19 @@ def load_model():
 semanticodec = load_model()
-@spaces.GPU(duration=60)
 def encode_audio(audio_path):
     """Encode audio file to tokens and save them"""
     tokens = semanticodec.encode(audio_path)
     # Move tokens to CPU before converting to numpy
     if isinstance(tokens, torch.Tensor):
         tokens = tokens.cpu().numpy()
-    token_path = "encoded_audio.oterin"
-    np.save(token_path, tokens)
     return token_path, f"Encoded to {len(tokens)} tokens"
 @spaces.GPU(duration=60)
@@ -35,15 +41,35 @@ def decode_tokens(token_path):
     # Move waveform to CPU if it's a tensor
     if isinstance(waveform, torch.Tensor):
         waveform = waveform.cpu().numpy()
-    output_path = "output.wav"
-    sf.write(output_path, waveform[0, 0], 32000)
-    return output_path, f"Decoded {len(tokens)} tokens to audio"
 def process_both(audio_path):
-    """Encode and then decode the audio"""
-    token_path, encode_msg = encode_audio(audio_path)
-    output_path, decode_msg = decode_tokens(token_path)
-    return output_path, f"{encode_msg}\n{decode_msg}"
 # Create Gradio interface
 with gr.Blocks(title="Oterin Audio Codec") as demo:
@@ -75,4 +101,4 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
         both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
 if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import HfApi
 import spaces
 import torch
+import tempfile
+import io
 # Initialize the model
 def load_model():
 semanticodec = load_model()
+@spaces.GPU(duration=20)
 def encode_audio(audio_path):
     """Encode audio file to tokens and save them"""
     tokens = semanticodec.encode(audio_path)
     # Move tokens to CPU before converting to numpy
     if isinstance(tokens, torch.Tensor):
         tokens = tokens.cpu().numpy()
+    # Save to a temporary file
+    with tempfile.NamedTemporaryFile(suffix='.oterin', delete=False) as tmp_file:
+        np.save(tmp_file.name, tokens)
+        token_path = tmp_file.name
     return token_path, f"Encoded to {len(tokens)} tokens"
 @spaces.GPU(duration=60)
     # Move waveform to CPU if it's a tensor
     if isinstance(waveform, torch.Tensor):
         waveform = waveform.cpu().numpy()
+    # Create in-memory file
+    output_buffer = io.BytesIO()
+    sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
+    output_buffer.seek(0)
+    return output_buffer, f"Decoded {len(tokens)} tokens to audio"
+@spaces.GPU(duration=80)
 def process_both(audio_path):
+    """Encode and then decode the audio without saving intermediate files"""
+    # Encode
+    tokens = semanticodec.encode(audio_path)
+    if isinstance(tokens, torch.Tensor):
+        tokens = tokens.cpu().numpy()
+    # Decode directly
+    if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
+        tokens = torch.tensor(tokens)
+    waveform = semanticodec.decode(tokens)
+    if isinstance(waveform, torch.Tensor):
+        waveform = waveform.cpu().numpy()
+    # Create in-memory file
+    output_buffer = io.BytesIO()
+    sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
+    output_buffer.seek(0)
+    return output_buffer, f"Encoded to {len(tokens)} tokens\nDecoded {len(tokens)} tokens to audio"
 # Create Gradio interface
 with gr.Blocks(title="Oterin Audio Codec") as demo:
         both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
 if __name__ == "__main__":
+    demo.launch(share=True)