import os import torch import librosa import numpy as np import gradio as gr from sonics import HFAudioClassifier # Model configurations MODEL_IDS = { "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s", "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s", "SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s", "SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s", "SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s", "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s", } device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_cache = {} def load_model(model_name): """Load model if not already cached""" if model_name not in model_cache: model_id = MODEL_IDS[model_name] model = HFAudioClassifier.from_pretrained(model_id) model = model.to(device) model.eval() model_cache[model_name] = model return model_cache[model_name] def process_audio(audio_path, model_name): """Process audio file and return prediction""" try: model = load_model(model_name) max_time = model.config.audio.max_time # Load and process audio audio, sr = librosa.load(audio_path, sr=16000) chunk_samples = int(max_time * sr) total_chunks = len(audio) // chunk_samples middle_chunk_idx = total_chunks // 2 # Extract middle chunk start = middle_chunk_idx * chunk_samples end = start + chunk_samples chunk = audio[start:end] if len(chunk) < chunk_samples: chunk = np.pad(chunk, (0, chunk_samples - len(chunk))) # Get prediction with torch.no_grad(): chunk = torch.from_numpy(chunk).float().to(device) pred = model(chunk.unsqueeze(0)) prob = torch.sigmoid(pred).cpu().numpy()[0] real_prob = 1 - prob fake_prob = prob # Return formatted results return { "Real": float(real_prob), "Fake": float(fake_prob) } except Exception as e: return {"Error": str(e)} def predict(audio_file, model_name): """Gradio interface function""" if audio_file is None: return {"Message": "Please upload an audio file"} return process_audio(audio_file, model_name) # Custom CSS for styling - Dark theme with black background css = """ :root { --primary-bg: #000000; --secondary-bg: #111111; --panel-bg: #1e1e1e; --text-color: #ffffff; --text-secondary: #bbbbbb; --border-color: #333333; --analyze-button-color: #ffa500; --analyze-button-hover: #ff8c00; --accent-color: #4a78e5; } body, .gradio-container { background-color: var(--primary-bg) !important; color: var(--text-color) !important; } .footer, .header-container, .accordion-content { background-color: var(--secondary-bg) !important; color: var(--text-color) !important; } /* Headers and text */ h1, h2, h3 { color: var(--text-color) !important; } p { color: var(--text-secondary) !important; } /* Button styling */ button#submit_btn { background-color: var(--analyze-button-color) !important; color: white !important; border: none !important; font-weight: bold !important; padding: 10px 20px !important; font-size: 16px !important; border-radius: 8px !important; } button#submit_btn:hover { background-color: var(--analyze-button-hover) !important; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5) !important; transform: translateY(-2px) !important; transition: all 0.2s ease !important; } /* Panel backgrounds */ .gr-panel, .gr-box, .gr-form, .gr-input-label, .gr-input { background-color: var(--panel-bg) !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; color: var(--text-color) !important; } /* Results panel */ #output { background-color: var(--panel-bg) !important; border-radius: 8px !important; padding: 10px !important; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3) !important; border: 1px solid var(--border-color) !important; } /* Resource links */ .resource-link { background-color: var(--secondary-bg) !important; color: var(--accent-color) !important; border: 1px solid var(--border-color) !important; padding: 8px 16px !important; border-radius: 20px !important; margin: 5px !important; text-decoration: none !important; display: inline-block !important; font-weight: 500 !important; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important; } .resource-link:hover { transform: translateY(-2px) !important; box-shadow: 0 3px 6px rgba(0, 0, 0, 0.4) !important; transition: all 0.2s ease !important; background-color: #222222 !important; } .header-container { padding: 20px !important; border-radius: 10px !important; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3) !important; margin-bottom: 20px !important; border: 1px solid var(--border-color) !important; } /* Accordion styling */ .gr-accordion { border: 1px solid var(--border-color) !important; border-radius: 8px !important; overflow: hidden !important; background-color: var(--panel-bg) !important; } .gr-accordion-header { background-color: var(--secondary-bg) !important; padding: 10px 15px !important; font-weight: 600 !important; color: var(--text-color) !important; } /* Audio player */ .audio-player { background-color: var(--panel-bg) !important; border-radius: 8px !important; overflow: hidden !important; } /* Dropdown & Input fields */ select, input, .gr-dropdown { background-color: var(--panel-bg) !important; color: var(--text-color) !important; border: 1px solid var(--border-color) !important; } /* Labels */ label, .gr-label { color: var(--text-secondary) !important; } /* Footer styling */ .footer { border-top: 1px solid var(--border-color) !important; margin-top: 30px !important; padding: 15px !important; } """ # Create Gradio interface with gr.Blocks(css=css, theme=gr.themes.Default()) as demo: # Title and Logo gr.HTML( """
Detect if a song is real or AI-generated with our state-of-the-art models. Simply upload an audio file to verify its authenticity!