Spaces:

owiedotch
/

oac

Sleeping

App Files Files Community

owiedotch commited on Feb 27

Commit

fc8b181

verified ·

1 Parent(s): 72b8ffa

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -164

app.py CHANGED Viewed

@@ -1,168 +1,68 @@
 import gradio as gr
-import spaces
-import torch
-import torchaudio
-from semanticodec import SemantiCodec
-import tempfile
 import numpy as np
-import lz4.frame
-import os
-from typing import Generator
-import asyncio  # Import asyncio for cancellation
-import traceback  # Import traceback for error handling
-import pickle
 import soundfile as sf
-# Initialize model with the specified parameters
-semanticodec = SemantiCodec(token_rate=100, semantic_vocab_size=32768)  # 1.40 kbps
-# Move the entire model to GPU if available
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-semanticodec = semanticodec.to(device)
-print(f"Model moved to device: {device}")
-# Global variables for cancellation
-cancel_encode = False
-cancel_decode = False
-cancel_stream = False
-@spaces.GPU(duration=75)
-def encode_audio(filepath):
-    """Encode and decode audio file"""
-    try:
-        # Encode and decode directly as in the example
-        tokens = semanticodec.encode(filepath)
-        waveform = semanticodec.decode(tokens)
-        # Save using soundfile
-        sf.write("output.wav", waveform[0,0], 16000)
-        return "output.wav"
-    except Exception as e:
-        print(f"Error: {e}")
-        traceback.print_exc()
-        return None
-# Add this function to handle the output
-def handle_encode_output(file_path):
-    if file_path is None:
-        return None, gr.Markdown("Encoding failed. Please ensure you've uploaded an audio file and try again.", visible=True)
-    return file_path, gr.Markdown(visible=False)
-@spaces.GPU(duration=75)
-def decode_audio(encoded_file='encoded.pkl'):
-    """Decode tokens back to audio"""
-    try:
-        # Load the tokens
-        with open(encoded_file, 'rb') as f:
-            data = pickle.load(f)
-        tokens = data['tokens']
-        sample_rate = data['sample_rate']
-        # Move tokens to same device as model
-        device = next(semanticodec.parameters()).device
-        tokens = tokens.to(device)
-        # Decode
-        waveform = semanticodec.decode(tokens)
-        # Save the reconstruction file
-        torchaudio.save("output.wav", waveform[0,0].cpu(), sample_rate)
-        return "output.wav"
-    except Exception as e:
-        print(f"Decoding error: {e}")
-        traceback.print_exc()
-        return None
-@spaces.GPU(duration=75)
-async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
-    global cancel_stream
-    try:
-        # Load encoded data and sample rate from the .owie file
-        with open(encoded_file_path, 'rb') as temp_file:
-            sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
-            ndim = int.from_bytes(temp_file.read(4), byteorder='little')
-            shape = tuple(int.from_bytes(temp_file.read(4), byteorder='little') for _ in range(ndim))
-            compressed_size = int.from_bytes(temp_file.read(4), byteorder='little')
-            compressed_data = temp_file.read(compressed_size)
-            tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
-            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(shape)
-        # Create a tensor from the numpy array
-        tokens = torch.from_numpy(tokens_numpy)
-        # Determine the device of the model
-        model_device = next(semanticodec.parameters()).device
-        print(f"Model device: {model_device}")
-        # Move the tokens to the same device as the model
-        tokens = tokens.to(model_device)
-        print(f"Streaming tokens device: {tokens.device}")
-        # Decode the audio in chunks
-        chunk_size = sample_rate * 2  # Adjust chunk size as needed
-        with torch.no_grad():
-            for i in range(0, tokens.shape[1], chunk_size):
-                if cancel_stream:
-                    break  # Exit the loop if cancellation is requested
-                tokens_chunk = tokens[:, i:i+chunk_size, :]
-                audio_chunk = semanticodec.decode(tokens_chunk)
-                # Convert to numpy array and transpose
-                audio_data = audio_chunk.squeeze(0).cpu().numpy().T
-                yield (sample_rate, audio_data)
-                await asyncio.sleep(0)  # Allow for cancellation check
-    except Exception as e:
-        print(f"Streaming decoding error: {e}")
-        print(f"Traceback: {traceback.format_exc()}")
-        yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32))  # Return silence
-    finally:
-        cancel_stream = False
-# Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("## Audio Compression with SemantiCodec (GPU/CPU)")
-    with gr.Tab("Encode"):
-        input_audio = gr.Audio(label="Input Audio", type="filepath")
-        encode_button = gr.Button("Encode")
-        cancel_encode_button = gr.Button("Cancel")
-        encoded_output = gr.File(label="Encoded File (.owie)", type="filepath")
-        encode_error_message = gr.Markdown(visible=False)
-        def encode_wrapper(audio):
-            if audio is None:
-                return None, gr.Markdown("Please upload an audio file before encoding.", visible=True)
-            return handle_encode_output(encode_audio(audio))
-        encode_button.click(
-            encode_wrapper,
-            inputs=input_audio,
-            outputs=[encoded_output, encode_error_message]
-        )
-        cancel_encode_button.click(lambda: globals().update(cancel_encode=True), outputs=None)
-    with gr.Tab("Decode"):
-        input_encoded = gr.File(label="Encoded File (.owie)", type="filepath")
-        decode_button = gr.Button("Decode")
-        cancel_decode_button = gr.Button("Cancel")
-        decoded_output = gr.Audio(label="Decoded Audio", type="filepath")
-        decode_button.click(decode_audio, inputs=input_encoded, outputs=decoded_output)
-        cancel_decode_button.click(lambda: globals().update(cancel_decode=True), outputs=None)
-    with gr.Tab("Streaming"):
-        input_encoded_stream = gr.File(label="Encoded File (.owie)", type="filepath")
-        stream_button = gr.Button("Start Streaming")
-        cancel_stream_button = gr.Button("Cancel")
-        audio_output = gr.Audio(label="Streaming Audio Output", streaming=True)
-        stream_button.click(stream_decode_audio, inputs=input_encoded_stream, outputs=audio_output)
-        cancel_stream_button.click(lambda: globals().update(cancel_stream=True), outputs=None)
-demo.queue().launch()

+import os
 import gradio as gr
 import numpy as np
 import soundfile as sf
+from semanticodec import SemantiCodec
+from huggingface_hub import HfApi
+import spaces
+# Initialize the model
+def load_model():
+    return SemantiCodec(token_rate=100, semantic_vocab_size=32768)  # 1.40 kbps
+semanticodec = load_model()
+@spaces.GPU(duration=60)
+def encode_audio(audio_path):
+    """Encode audio file to tokens and save them"""
+    tokens = semanticodec.encode(audio_path)
+    token_path = "encoded_audio.oterin"
+    np.save(token_path, tokens)
+    return token_path, f"Encoded to {len(tokens)} tokens"
+@spaces.GPU(duration=60)
+def decode_tokens(token_path):
+    """Decode tokens to audio"""
+    tokens = np.load(token_path)
+    waveform = semanticodec.decode(tokens)
+    output_path = "output.wav"
+    sf.write(output_path, waveform[0, 0], 32000)
+    return output_path, f"Decoded {len(tokens)} tokens to audio"
+def process_both(audio_path):
+    """Encode and then decode the audio"""
+    token_path, encode_msg = encode_audio(audio_path)
+    output_path, decode_msg = decode_tokens(token_path)
+    return output_path, f"{encode_msg}\n{decode_msg}"
+# Create Gradio interface
+with gr.Blocks(title="Oterin Audio Codec") as demo:
+    gr.Markdown("# Oterin Audio Codec")
+    gr.Markdown("Upload an audio file to encode it to semantic tokens, decode tokens back to audio, or do both.")
+    with gr.Tab("Encode Audio"):
+        with gr.Row():
+            encode_input = gr.Audio(type="filepath", label="Input Audio")
+            encode_output = gr.File(label="Encoded Tokens (.oterin)")
+        encode_status = gr.Textbox(label="Status")
+        encode_btn = gr.Button("Encode")
+        encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
+    with gr.Tab("Decode Tokens"):
+        with gr.Row():
+            decode_input = gr.File(label="Token File (.oterin)")
+            decode_output = gr.Audio(label="Decoded Audio")
+        decode_status = gr.Textbox(label="Status")
+        decode_btn = gr.Button("Decode")
+        decode_btn.click(decode_tokens, inputs=decode_input, outputs=[decode_output, decode_status])
+    with gr.Tab("Both (Encode & Decode)"):
+        with gr.Row():
+            both_input = gr.Audio(type="filepath", label="Input Audio")
+            both_output = gr.Audio(label="Reconstructed Audio")
+        both_status = gr.Textbox(label="Status")
+        both_btn = gr.Button("Process")
+        both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
+if __name__ == "__main__":
+    demo.launch()