Spaces:

awsaf49
/

sonics-fake-song-detection

Running

App Files Files Community

awsaf49 commited on 24 days ago

Commit

3085c15

verified ·

1 Parent(s): 2536f16

Update app.py

Browse files

Files changed (1) hide show

app.py +265 -264

app.py CHANGED Viewed

@@ -1,265 +1,266 @@
-import os
-import torch
-import librosa
-import numpy as np
-import gradio as gr
-from sonics import HFAudioClassifier
-# Model configurations
-MODEL_IDS = {
-    "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
-    "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
-    "SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s",
-    "SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s",
-    "SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s",
-    "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
-}
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_cache = {}
-def load_model(model_name):
-    """Load model if not already cached"""
-    if model_name not in model_cache:
-        model_id = MODEL_IDS[model_name]
-        model = HFAudioClassifier.from_pretrained(model_id)
-        model = model.to(device)
-        model.eval()
-        model_cache[model_name] = model
-    return model_cache[model_name]
-def process_audio(audio_path, model_name):
-    """Process audio file and return prediction"""
-    try:
-        model = load_model(model_name)
-        max_time = model.config.audio.max_time
-        # Load and process audio
-        audio, sr = librosa.load(audio_path, sr=16000)
-        chunk_samples = int(max_time * sr)
-        total_chunks = len(audio) // chunk_samples
-        middle_chunk_idx = total_chunks // 2
-        # Extract middle chunk
-        start = middle_chunk_idx * chunk_samples
-        end = start + chunk_samples
-        chunk = audio[start:end]
-        if len(chunk) < chunk_samples:
-            chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
-        # Get prediction
-        with torch.no_grad():
-            chunk = torch.from_numpy(chunk).float().to(device)
-            pred = model(chunk.unsqueeze(0))
-            prob = torch.sigmoid(pred).cpu().numpy()[0]
-        real_prob = 1 - prob
-        fake_prob = prob
-        # Return formatted results
-        return {
-            "Real": float(real_prob),
-            "Fake": float(fake_prob)
-        }
-    except Exception as e:
-        return {"Error": str(e)}
-def predict(audio_file, model_name):
-    """Gradio interface function"""
-    if audio_file is None:
-        return {"Message": "Please upload an audio file"}
-    return process_audio(audio_file, model_name)
-# Updated CSS with better color scheme for resource links
-css = """
-/* Custom CSS that works with Ocean theme */
-.sonics-header {
-    text-align: center;
-    padding: 20px;
-    margin-bottom: 20px;
-    border-radius: 10px;
-}
-.sonics-logo {
-    max-width: 150px;
-    border-radius: 10px;
-    box-shadow: 0 4px 8px rgba(0,0,0,0.3);
-}
-.sonics-title {
-    font-size: 28px;
-    margin-bottom: 10px;
-}
-.sonics-subtitle {
-    margin-bottom: 15px;
-}
-.sonics-description {
-    font-size: 16px;
-    margin: 0;
-}
-/* Resource links styling */
-.resource-links {
-    display: flex;
-    justify-content: center;
-    flex-wrap: wrap;
-    gap: 8px;
-    margin-bottom: 25px;
-}
-.resource-link {
-    background-color: #222222;
-    color: #4aedd6;
-    border: 1px solid #333333;
-    padding: 8px 16px;
-    border-radius: 20px;
-    margin: 5px;
-    text-decoration: none;
-    display: inline-block;
-    font-weight: 500;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
-    transition: all 0.2s ease;
-}
-.resource-link:hover {
-    background-color: #333333;
-    transform: translateY(-2px);
-    box-shadow: 0 3px 6px rgba(0, 0, 0, 0.4);
-    transition: all 0.2s ease;
-}
-.resource-link-icon {
-    margin-right: 5px;
-}
-/* Footer styling */
-.sonics-footer {
-    text-align: center;
-    margin-top: 30px;
-    padding: 15px;
-}
-"""
-# Create Gradio interface
-with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
-    # Title and Logo
-    gr.HTML(
-        """
-        <div class="sonics-header">
-            <div style="display: flex; justify-content: center; margin-bottom: 20px;">
-                <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg" class="sonics-logo">
-            </div>
-            <h1 class="sonics-title">SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
-            <h3 class="sonics-subtitle">ICLR 2025 [Poster]</h3>
-            <p class="sonics-description">
-                Detect if a song is real or AI-generated with our state-of-the-art models.
-                Simply upload an audio file to verify its authenticity!
-            </p>
-        </div>
-        """
-    )
-    # Resource Links - Updated with custom styling to match screenshot
-    gr.HTML(
-        """
-        <div class="resource-links">
-            <a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank" class="resource-link">
-                <span class="resource-link-icon">📄</span>Paper
-            </a>
-            <a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank" class="resource-link">
-                <span class="resource-link-icon">🎵</span>Dataset
-            </a>
-            <a href="https://huggingface.co/collections/awsaf49/sonics-spectttra-67bb6517b3920fd18e409013" target="_blank" class="resource-link">
-                <span class="resource-link-icon">🤖</span>Models
-            </a>
-            <a href="https://arxiv.org/abs/2408.14080" target="_blank" class="resource-link">
-                <span class="resource-link-icon">🔬</span>ArXiv
-            </a>
-            <a href="https://github.com/awsaf49/sonics" target="_blank" class="resource-link">
-                <span class="resource-link-icon">💻</span>GitHub
-            </a>
-        </div>
-        """
-    )
-    # Main Interface
-    with gr.Row(equal_height=True):
-        with gr.Column():
-            audio_input = gr.Audio(
-                label="Upload Audio File",
-                type="filepath",
-                elem_id="audio_input"
-            )
-            model_dropdown = gr.Dropdown(
-                choices=list(MODEL_IDS.keys()),
-                value="SpecTTTra-γ (5s)",
-                label="Select Model",
-                elem_id="model_dropdown"
-            )
-            submit_btn = gr.Button(
-                "✨ Analyze Audio",
-                elem_id="submit_btn"
-            )
-        with gr.Column():
-            # Define output before using it in Examples
-            output = gr.Label(
-                label="Analysis Result",
-                num_top_classes=2,
-                elem_id="output"
-            )
-            with gr.Accordion("How It Works", open=False):
-                gr.Markdown("""
-                    ## The SONICS classifier
-                    The SONICS classifier analyzes your audio to determine if it's an authentic song (human created) or generated by AI. Our models are trained on a diverse dataset of real and AI-generated songs from Suno and Udio.
-                    ### Models available:
-                    - **SpecTTTra-α**: Optimized for speed
-                    - **SpecTTTra-β**: Balanced performance
-                    - **SpecTTTra-γ**: Highest accuracy
-                    ### Duration variants:
-                    - **5s**: Analyzes a 5-second clip (faster)
-                    - **120s**: Analyzes up to 2 minutes (more accurate)
-                """)
-    # Add Examples section after output is defined
-    with gr.Accordion("Example Audio Files", open=True):
-        gr.Examples(
-            examples=[
-                ["example/real_song.mp3", "SpecTTTra-γ (5s)"],
-                ["example/fake_song.mp3", "SpecTTTra-γ (5s)"],
-            ],
-            inputs=[audio_input, model_dropdown],
-            outputs=[output],
-            fn=predict,
-            cache_examples=True,
-        )
-    # Footer
-    gr.HTML(
-        """
-        <div class="sonics-footer">
-            <p>SONICS: Synthetic Or Not - Identifying Counterfeit Songs | ICLR 2025</p>
-            <p style="font-size: 12px;">For research purposes only</p>
-        </div>
-        """
-    )
-    # Prediction handling
-    submit_btn.click(fn=predict, inputs=[audio_input, model_dropdown], outputs=[output])
-if __name__ == "__main__":
     demo.launch()

+import os
+import torch
+import librosa
+import numpy as np
+import gradio as gr
+from sonics import HFAudioClassifier
+# Model configurations
+MODEL_IDS = {
+    "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
+    "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
+    "SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s",
+    "SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s",
+    "SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s",
+    "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
+}
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_cache = {}
+def load_model(model_name):
+    """Load model if not already cached"""
+    if model_name not in model_cache:
+        model_id = MODEL_IDS[model_name]
+        model = HFAudioClassifier.from_pretrained(model_id)
+        model = model.to(device)
+        model.eval()
+        model_cache[model_name] = model
+    return model_cache[model_name]
+def process_audio(audio_path, model_name):
+    """Process audio file and return prediction"""
+    try:
+        model = load_model(model_name)
+        max_time = model.config.audio.max_time
+        # Load and process audio
+        audio, sr = librosa.load(audio_path, sr=16000)
+        chunk_samples = int(max_time * sr)
+        total_chunks = len(audio) // chunk_samples
+        middle_chunk_idx = total_chunks // 2
+        # Extract middle chunk
+        start = middle_chunk_idx * chunk_samples
+        end = start + chunk_samples
+        chunk = audio[start:end]
+        if len(chunk) < chunk_samples:
+            chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
+        # Get prediction
+        with torch.no_grad():
+            chunk = torch.from_numpy(chunk).float().to(device)
+            pred = model(chunk.unsqueeze(0))
+            prob = torch.sigmoid(pred).cpu().numpy()[0]
+        real_prob = 1 - prob
+        fake_prob = prob
+        # Return formatted results
+        return {
+            "Real": float(real_prob),
+            "Fake": float(fake_prob)
+        }
+    except Exception as e:
+        return {"Error": str(e)}
+def predict(audio_file, model_name):
+    """Gradio interface function"""
+    if audio_file is None:
+        return {"Message": "Please upload an audio file"}
+    return process_audio(audio_file, model_name)
+# Updated CSS with better color scheme for resource links
+css = """
+/* Custom CSS that works with Ocean theme */
+.sonics-header {
+    text-align: center;
+    padding: 20px;
+    margin-bottom: 20px;
+    border-radius: 10px;
+}
+.sonics-logo {
+    max-width: 150px;
+    border-radius: 10px;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.3);
+}
+.sonics-title {
+    font-size: 28px;
+    margin-bottom: 10px;
+}
+.sonics-subtitle {
+    margin-bottom: 15px;
+}
+.sonics-description {
+    font-size: 16px;
+    margin: 0;
+}
+/* Resource links styling */
+.resource-links {
+    display: flex;
+    justify-content: center;
+    flex-wrap: wrap;
+    gap: 8px;
+    margin-bottom: 25px;
+}
+.resource-link {
+    background-color: #222222;
+    color: #4aedd6;
+    border: 1px solid #333333;
+    padding: 8px 16px;
+    border-radius: 20px;
+    margin: 5px;
+    text-decoration: none;
+    display: inline-block;
+    font-weight: 500;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
+    transition: all 0.2s ease;
+}
+.resource-link:hover {
+    background-color: #333333;
+    transform: translateY(-2px);
+    box-shadow: 0 3px 6px rgba(0, 0, 0, 0.4);
+    transition: all 0.2s ease;
+}
+.resource-link-icon {
+    margin-right: 5px;
+}
+/* Footer styling */
+.sonics-footer {
+    text-align: center;
+    margin-top: 30px;
+    padding: 15px;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
+    # Title and Logo
+    gr.HTML(
+        """
+        <div class="sonics-header">
+            <div style="display: flex; justify-content: center; margin-bottom: 20px;">
+                <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg" class="sonics-logo">
+            </div>
+            <h1 class="sonics-title">SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
+            <h3 class="sonics-subtitle">ICLR 2025 [Poster]</h3>
+            <p class="sonics-description">
+                Detect if a song is real or AI-generated with our state-of-the-art models.
+                Simply upload an audio file to verify its authenticity!
+            </p>
+        </div>
+        """
+    )
+    # Resource Links - Updated with custom styling to match screenshot
+    gr.HTML(
+        """
+        <div class="resource-links">
+            <a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank" class="resource-link">
+                <span class="resource-link-icon">📄</span>Paper
+            </a>
+            <a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank" class="resource-link">
+                <span class="resource-link-icon">🎵</span>Dataset
+            </a>
+            <a href="https://huggingface.co/collections/awsaf49/sonics-spectttra-67bb6517b3920fd18e409013" target="_blank" class="resource-link">
+                <span class="resource-link-icon">🤖</span>Models
+            </a>
+            <a href="https://arxiv.org/abs/2408.14080" target="_blank" class="resource-link">
+                <span class="resource-link-icon">🔬</span>ArXiv
+            </a>
+            <a href="https://github.com/awsaf49/sonics" target="_blank" class="resource-link">
+                <span class="resource-link-icon">💻</span>GitHub
+            </a>
+        </div>
+        """
+    )
+    # Main Interface
+    with gr.Row(equal_height=True):
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="Upload Audio File",
+                type="filepath",
+                elem_id="audio_input"
+            )
+            model_dropdown = gr.Dropdown(
+                choices=list(MODEL_IDS.keys()),
+                value="SpecTTTra-γ (5s)",
+                label="Select Model",
+                elem_id="model_dropdown"
+            )
+            submit_btn = gr.Button(
+                "✨ Analyze Audio",
+                elem_id="submit_btn",
+                variant="primary"
+            )
+        with gr.Column():
+            # Define output before using it in Examples
+            output = gr.Label(
+                label="Analysis Result",
+                num_top_classes=2,
+                elem_id="output"
+            )
+            with gr.Accordion("How It Works", open=True):
+                gr.Markdown("""
+                    ### The SONICS classifier
+                    The SONICS classifier analyzes your audio to determine if it's an authentic song (human created) or generated by AI. Our models are trained on a diverse dataset of real and AI-generated songs from Suno and Udio.
+                    ### Models available:
+                    - **SpecTTTra-γ**: Optimized for speed
+                    - **SpecTTTra-β**: Balanced performance
+                    - **SpecTTTra-α**: Highest accuracy
+                    ### Duration variants:
+                    - **5s**: Analyzes a 5-second clip (faster)
+                    - **120s**: Analyzes up to 2 minutes (more accurate)
+                """)
+    # Add Examples section after output is defined
+    with gr.Accordion("Example Audio Files", open=True):
+        gr.Examples(
+            examples=[
+                ["example/real_song.mp3", "SpecTTTra-γ (5s)"],
+                ["example/fake_song.mp3", "SpecTTTra-γ (5s)"],
+            ],
+            inputs=[audio_input, model_dropdown],
+            outputs=[output],
+            fn=predict,
+            cache_examples=True,
+        )
+    # Footer
+    gr.HTML(
+        """
+        <div class="sonics-footer">
+            <p>SONICS: Synthetic Or Not - Identifying Counterfeit Songs | ICLR 2025</p>
+            <p style="font-size: 12px;">For research purposes only</p>
+        </div>
+        """
+    )
+    # Prediction handling
+    submit_btn.click(fn=predict, inputs=[audio_input, model_dropdown], outputs=[output])
+if __name__ == "__main__":
     demo.launch()