File size: 5,106 Bytes
3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 d714fce 3f50570 d714fce 11f59f6 d714fce 3f50570 11f59f6 d714fce 8ddf2f8 d714fce 3f50570 11f59f6 3f50570 11f59f6 d714fce 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 3f50570 11f59f6 d714fce 11f59f6 d714fce 11f59f6 d714fce 11f59f6 3f50570 11f59f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
import torch
import librosa
import numpy as np
import gradio as gr
from sonics import HFAudioClassifier
# Model configurations
MODEL_IDS = {
"SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
"SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
"SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s",
"SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s",
"SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s",
"SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_cache = {}
def load_model(model_name):
"""Load model if not already cached"""
if model_name not in model_cache:
model_id = MODEL_IDS[model_name]
model = HFAudioClassifier.from_pretrained(model_id)
model = model.to(device)
model.eval()
model_cache[model_name] = model
return model_cache[model_name]
def process_audio(audio_path, model_name):
"""Process audio file and return prediction"""
try:
model = load_model(model_name)
max_time = model.config.audio.max_time
# Load and process audio
audio, sr = librosa.load(audio_path, sr=16000)
chunk_samples = int(max_time * sr)
total_chunks = len(audio) // chunk_samples
middle_chunk_idx = total_chunks // 2
# Extract middle chunk
start = middle_chunk_idx * chunk_samples
end = start + chunk_samples
chunk = audio[start:end]
if len(chunk) < chunk_samples:
chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
# Get prediction
with torch.no_grad():
chunk = torch.from_numpy(chunk).float().to(device)
pred = model(chunk.unsqueeze(0))
prob = torch.sigmoid(pred).cpu().numpy()[0]
return {"Real": 1 - prob, "Fake": prob}
except Exception as e:
return {"Error": str(e)}
def predict(audio_file, model_name):
"""Gradio interface function"""
if audio_file is None:
return {"Message": "Please upload an audio file"}
return process_audio(audio_file, model_name)
# Create Gradio interface
with gr.Blocks() as demo:
# Title, Subtitle, and Logo
gr.HTML(
"""
<div style="text-align: center;">
<img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg"
style="max-width: 150px; margin: 0 auto;">
<h1>SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
<h3>ICLR 2025 [Poster]</h3>
<p style="font-size: 1.1em; color: #666; margin: 10px 0;">
Detect if a song is real or AI-generated (created using text-to-song models).
Upload any audio file to check its authenticity!
</p>
</div>
"""
)
# # Resource Links
# with gr.Row():
# paper_radio = gr.Radio(
# choices=["Paper", "Dataset", "ArXiv", "GitHub"],
# label="Resources",
# info="Click to visit respective links"
# )
gr.HTML(
"""
<div style="text-align: center; margin-bottom: 1rem;">
<p>
<a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank">📄 Paper</a> |
<a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank">🎵 Dataset</a> |
<a href="https://arxiv.org/abs/2408.14080" target="_blank">🔬 ArXiv</a> |
<a href="https://github.com/awsaf49/sonics" target="_blank">💻 GitHub</a>
</p>
</div>
"""
)
# Main Interface
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
label="Upload Audio File",
type="filepath"
)
model_dropdown = gr.Dropdown(
choices=list(MODEL_IDS.keys()),
value="SpecTTTra-γ (5s)",
label="Select Model"
)
submit_btn = gr.Button("Analyze Audio")
with gr.Column():
output = gr.Label(
label="Analysis Result",
num_top_classes=2
)
# Link handling for resource radio buttons
def open_link(choice):
links = {
"Paper": "https://openreview.net/forum?id=PY7KSh29Z8",
"Dataset": "https://huggingface.co/datasets/awsaf49/sonics",
"ArXiv": "https://arxiv.org/abs/2408.14080",
"GitHub": "https://github.com/awsaf49/sonics"
}
gr.open_url(links[choice])
paper_radio.change(fn=open_link, inputs=[paper_radio])
# Prediction handling
submit_btn.click(
fn=predict,
inputs=[audio_input, model_dropdown],
outputs=[output]
)
if __name__ == "__main__":
demo.launch() |