import os import torch import librosa import numpy as np import gradio as gr from sonics import HFAudioClassifier # Model configurations MODEL_IDS = { "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s", "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s", "SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s", "SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s", "SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s", "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s", } device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_cache = {} def load_model(model_name): """Load model if not already cached""" if model_name not in model_cache: model_id = MODEL_IDS[model_name] model = HFAudioClassifier.from_pretrained(model_id) model = model.to(device) model.eval() model_cache[model_name] = model return model_cache[model_name] def process_audio(audio_path, model_name): """Process audio file and return prediction""" try: model = load_model(model_name) max_time = model.config.audio.max_time # Load and process audio audio, sr = librosa.load(audio_path, sr=16000) chunk_samples = int(max_time * sr) total_chunks = len(audio) // chunk_samples middle_chunk_idx = total_chunks // 2 # Extract middle chunk start = middle_chunk_idx * chunk_samples end = start + chunk_samples chunk = audio[start:end] if len(chunk) < chunk_samples: chunk = np.pad(chunk, (0, chunk_samples - len(chunk))) # Get prediction with torch.no_grad(): chunk = torch.from_numpy(chunk).float().to(device) pred = model(chunk.unsqueeze(0)) prob = torch.sigmoid(pred).cpu().numpy()[0] real_prob = 1 - prob fake_prob = prob # Return formatted results with emojis return { "🎵 Real": float(real_prob), "🤖 Fake": float(fake_prob) } except Exception as e: return {"❌ Error": str(e)} def predict(audio_file, model_name): """Gradio interface function""" if audio_file is None: return {"⚠️ Message": "Please upload an audio file"} return process_audio(audio_file, model_name) # Custom CSS for styling css = """ :root { --primary-color: #6366f1; --secondary-color: #8b5cf6; --accent-color: #ec4899; --background-color: #f8fafc; --text-color: #1e293b; --border-radius: 10px; } .gradio-container { background-color: var(--background-color); } .gr-button { background: linear-gradient(90deg, var(--primary-color), var(--secondary-color)); border: none !important; color: white !important; border-radius: var(--border-radius) !important; } .gr-button:hover { background: linear-gradient(90deg, var(--secondary-color), var(--accent-color)); transform: translateY(-2px); box-shadow: 0 10px 20px rgba(0,0,0,0.1); transition: all 0.3s ease; } .gr-form { border-radius: var(--border-radius) !important; border: 1px solid #e2e8f0 !important; box-shadow: 0 4px 12px rgba(0,0,0,0.05) !important; } .footer { margin-top: 20px; text-align: center; font-size: 0.9em; color: #64748b; } .gradient-text { background: linear-gradient(90deg, var(--primary-color), var(--accent-color)); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; text-fill-color: transparent; } .logo-container { display: flex; justify-content: center; margin-bottom: 1rem; } .header-container { text-align: center; margin-bottom: 2rem; padding: 1.5rem; background: rgba(255, 255, 255, 0.8); border-radius: var(--border-radius); box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05); } .resource-links { display: flex; justify-content: center; gap: 1rem; flex-wrap: wrap; margin-bottom: 1.5rem; } .resource-link { display: inline-block; padding: 0.5rem 1rem; background: white; border-radius: var(--border-radius); color: var(--primary-color); text-decoration: none; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); transition: all 0.2s ease; } .resource-link:hover { transform: translateY(-2px); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15); } .label-container { border-radius: var(--border-radius); overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.05); } """ # Create Gradio interface with gr.Blocks(css=css) as demo: # Title, Subtitle, and Logo gr.HTML( """
Detect if a song is real or AI-generated with our state-of-the-art models. Simply upload an audio file to verify its authenticity!