awsaf49 commited on
Commit
3085c15
·
verified ·
1 Parent(s): 2536f16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -264
app.py CHANGED
@@ -1,265 +1,266 @@
1
- import os
2
- import torch
3
- import librosa
4
- import numpy as np
5
- import gradio as gr
6
- from sonics import HFAudioClassifier
7
-
8
- # Model configurations
9
- MODEL_IDS = {
10
- "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
11
- "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
12
- "SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s",
13
- "SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s",
14
- "SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s",
15
- "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
16
- }
17
-
18
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
- model_cache = {}
20
-
21
-
22
- def load_model(model_name):
23
- """Load model if not already cached"""
24
- if model_name not in model_cache:
25
- model_id = MODEL_IDS[model_name]
26
- model = HFAudioClassifier.from_pretrained(model_id)
27
- model = model.to(device)
28
- model.eval()
29
- model_cache[model_name] = model
30
- return model_cache[model_name]
31
-
32
-
33
- def process_audio(audio_path, model_name):
34
- """Process audio file and return prediction"""
35
- try:
36
- model = load_model(model_name)
37
- max_time = model.config.audio.max_time
38
-
39
- # Load and process audio
40
- audio, sr = librosa.load(audio_path, sr=16000)
41
- chunk_samples = int(max_time * sr)
42
- total_chunks = len(audio) // chunk_samples
43
- middle_chunk_idx = total_chunks // 2
44
-
45
- # Extract middle chunk
46
- start = middle_chunk_idx * chunk_samples
47
- end = start + chunk_samples
48
- chunk = audio[start:end]
49
-
50
- if len(chunk) < chunk_samples:
51
- chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
52
-
53
- # Get prediction
54
- with torch.no_grad():
55
- chunk = torch.from_numpy(chunk).float().to(device)
56
- pred = model(chunk.unsqueeze(0))
57
- prob = torch.sigmoid(pred).cpu().numpy()[0]
58
-
59
- real_prob = 1 - prob
60
- fake_prob = prob
61
-
62
- # Return formatted results
63
- return {
64
- "Real": float(real_prob),
65
- "Fake": float(fake_prob)
66
- }
67
-
68
- except Exception as e:
69
- return {"Error": str(e)}
70
-
71
-
72
- def predict(audio_file, model_name):
73
- """Gradio interface function"""
74
- if audio_file is None:
75
- return {"Message": "Please upload an audio file"}
76
- return process_audio(audio_file, model_name)
77
-
78
-
79
- # Updated CSS with better color scheme for resource links
80
- css = """
81
- /* Custom CSS that works with Ocean theme */
82
- .sonics-header {
83
- text-align: center;
84
- padding: 20px;
85
- margin-bottom: 20px;
86
- border-radius: 10px;
87
- }
88
-
89
- .sonics-logo {
90
- max-width: 150px;
91
- border-radius: 10px;
92
- box-shadow: 0 4px 8px rgba(0,0,0,0.3);
93
- }
94
-
95
- .sonics-title {
96
- font-size: 28px;
97
- margin-bottom: 10px;
98
- }
99
-
100
- .sonics-subtitle {
101
- margin-bottom: 15px;
102
- }
103
-
104
- .sonics-description {
105
- font-size: 16px;
106
- margin: 0;
107
- }
108
-
109
- /* Resource links styling */
110
- .resource-links {
111
- display: flex;
112
- justify-content: center;
113
- flex-wrap: wrap;
114
- gap: 8px;
115
- margin-bottom: 25px;
116
- }
117
-
118
- .resource-link {
119
- background-color: #222222;
120
- color: #4aedd6;
121
- border: 1px solid #333333;
122
- padding: 8px 16px;
123
- border-radius: 20px;
124
- margin: 5px;
125
- text-decoration: none;
126
- display: inline-block;
127
- font-weight: 500;
128
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
129
- transition: all 0.2s ease;
130
- }
131
-
132
- .resource-link:hover {
133
- background-color: #333333;
134
- transform: translateY(-2px);
135
- box-shadow: 0 3px 6px rgba(0, 0, 0, 0.4);
136
- transition: all 0.2s ease;
137
- }
138
-
139
- .resource-link-icon {
140
- margin-right: 5px;
141
- }
142
-
143
- /* Footer styling */
144
- .sonics-footer {
145
- text-align: center;
146
- margin-top: 30px;
147
- padding: 15px;
148
- }
149
- """
150
-
151
- # Create Gradio interface
152
- with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
153
- # Title and Logo
154
- gr.HTML(
155
- """
156
- <div class="sonics-header">
157
- <div style="display: flex; justify-content: center; margin-bottom: 20px;">
158
- <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg" class="sonics-logo">
159
- </div>
160
- <h1 class="sonics-title">SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
161
- <h3 class="sonics-subtitle">ICLR 2025 [Poster]</h3>
162
- <p class="sonics-description">
163
- Detect if a song is real or AI-generated with our state-of-the-art models.
164
- Simply upload an audio file to verify its authenticity!
165
- </p>
166
- </div>
167
- """
168
- )
169
-
170
- # Resource Links - Updated with custom styling to match screenshot
171
- gr.HTML(
172
- """
173
- <div class="resource-links">
174
- <a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank" class="resource-link">
175
- <span class="resource-link-icon">📄</span>Paper
176
- </a>
177
- <a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank" class="resource-link">
178
- <span class="resource-link-icon">🎵</span>Dataset
179
- </a>
180
- <a href="https://huggingface.co/collections/awsaf49/sonics-spectttra-67bb6517b3920fd18e409013" target="_blank" class="resource-link">
181
- <span class="resource-link-icon">🤖</span>Models
182
- </a>
183
- <a href="https://arxiv.org/abs/2408.14080" target="_blank" class="resource-link">
184
- <span class="resource-link-icon">🔬</span>ArXiv
185
- </a>
186
- <a href="https://github.com/awsaf49/sonics" target="_blank" class="resource-link">
187
- <span class="resource-link-icon">💻</span>GitHub
188
- </a>
189
- </div>
190
- """
191
- )
192
-
193
- # Main Interface
194
- with gr.Row(equal_height=True):
195
- with gr.Column():
196
- audio_input = gr.Audio(
197
- label="Upload Audio File",
198
- type="filepath",
199
- elem_id="audio_input"
200
- )
201
-
202
- model_dropdown = gr.Dropdown(
203
- choices=list(MODEL_IDS.keys()),
204
- value="SpecTTTra-γ (5s)",
205
- label="Select Model",
206
- elem_id="model_dropdown"
207
- )
208
-
209
- submit_btn = gr.Button(
210
- "✨ Analyze Audio",
211
- elem_id="submit_btn"
212
- )
213
-
214
- with gr.Column():
215
- # Define output before using it in Examples
216
- output = gr.Label(
217
- label="Analysis Result",
218
- num_top_classes=2,
219
- elem_id="output"
220
- )
221
-
222
- with gr.Accordion("How It Works", open=False):
223
- gr.Markdown("""
224
- ## The SONICS classifier
225
-
226
- The SONICS classifier analyzes your audio to determine if it's an authentic song (human created) or generated by AI. Our models are trained on a diverse dataset of real and AI-generated songs from Suno and Udio.
227
-
228
- ### Models available:
229
- - **SpecTTTra-α**: Optimized for speed
230
- - **SpecTTTra-β**: Balanced performance
231
- - **SpecTTTra-γ**: Highest accuracy
232
-
233
- ### Duration variants:
234
- - **5s**: Analyzes a 5-second clip (faster)
235
- - **120s**: Analyzes up to 2 minutes (more accurate)
236
- """)
237
-
238
- # Add Examples section after output is defined
239
- with gr.Accordion("Example Audio Files", open=True):
240
- gr.Examples(
241
- examples=[
242
- ["example/real_song.mp3", "SpecTTTra-γ (5s)"],
243
- ["example/fake_song.mp3", "SpecTTTra-γ (5s)"],
244
- ],
245
- inputs=[audio_input, model_dropdown],
246
- outputs=[output],
247
- fn=predict,
248
- cache_examples=True,
249
- )
250
-
251
- # Footer
252
- gr.HTML(
253
- """
254
- <div class="sonics-footer">
255
- <p>SONICS: Synthetic Or Not - Identifying Counterfeit Songs | ICLR 2025</p>
256
- <p style="font-size: 12px;">For research purposes only</p>
257
- </div>
258
- """
259
- )
260
-
261
- # Prediction handling
262
- submit_btn.click(fn=predict, inputs=[audio_input, model_dropdown], outputs=[output])
263
-
264
- if __name__ == "__main__":
 
265
  demo.launch()
 
1
+ import os
2
+ import torch
3
+ import librosa
4
+ import numpy as np
5
+ import gradio as gr
6
+ from sonics import HFAudioClassifier
7
+
8
+ # Model configurations
9
+ MODEL_IDS = {
10
+ "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
11
+ "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
12
+ "SpecTTTra-γ (5s)": "awsaf49/sonics-spectttra-gamma-5s",
13
+ "SpecTTTra-α (120s)": "awsaf49/sonics-spectttra-alpha-120s",
14
+ "SpecTTTra-β (120s)": "awsaf49/sonics-spectttra-beta-120s",
15
+ "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
16
+ }
17
+
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ model_cache = {}
20
+
21
+
22
+ def load_model(model_name):
23
+ """Load model if not already cached"""
24
+ if model_name not in model_cache:
25
+ model_id = MODEL_IDS[model_name]
26
+ model = HFAudioClassifier.from_pretrained(model_id)
27
+ model = model.to(device)
28
+ model.eval()
29
+ model_cache[model_name] = model
30
+ return model_cache[model_name]
31
+
32
+
33
+ def process_audio(audio_path, model_name):
34
+ """Process audio file and return prediction"""
35
+ try:
36
+ model = load_model(model_name)
37
+ max_time = model.config.audio.max_time
38
+
39
+ # Load and process audio
40
+ audio, sr = librosa.load(audio_path, sr=16000)
41
+ chunk_samples = int(max_time * sr)
42
+ total_chunks = len(audio) // chunk_samples
43
+ middle_chunk_idx = total_chunks // 2
44
+
45
+ # Extract middle chunk
46
+ start = middle_chunk_idx * chunk_samples
47
+ end = start + chunk_samples
48
+ chunk = audio[start:end]
49
+
50
+ if len(chunk) < chunk_samples:
51
+ chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
52
+
53
+ # Get prediction
54
+ with torch.no_grad():
55
+ chunk = torch.from_numpy(chunk).float().to(device)
56
+ pred = model(chunk.unsqueeze(0))
57
+ prob = torch.sigmoid(pred).cpu().numpy()[0]
58
+
59
+ real_prob = 1 - prob
60
+ fake_prob = prob
61
+
62
+ # Return formatted results
63
+ return {
64
+ "Real": float(real_prob),
65
+ "Fake": float(fake_prob)
66
+ }
67
+
68
+ except Exception as e:
69
+ return {"Error": str(e)}
70
+
71
+
72
+ def predict(audio_file, model_name):
73
+ """Gradio interface function"""
74
+ if audio_file is None:
75
+ return {"Message": "Please upload an audio file"}
76
+ return process_audio(audio_file, model_name)
77
+
78
+
79
+ # Updated CSS with better color scheme for resource links
80
+ css = """
81
+ /* Custom CSS that works with Ocean theme */
82
+ .sonics-header {
83
+ text-align: center;
84
+ padding: 20px;
85
+ margin-bottom: 20px;
86
+ border-radius: 10px;
87
+ }
88
+
89
+ .sonics-logo {
90
+ max-width: 150px;
91
+ border-radius: 10px;
92
+ box-shadow: 0 4px 8px rgba(0,0,0,0.3);
93
+ }
94
+
95
+ .sonics-title {
96
+ font-size: 28px;
97
+ margin-bottom: 10px;
98
+ }
99
+
100
+ .sonics-subtitle {
101
+ margin-bottom: 15px;
102
+ }
103
+
104
+ .sonics-description {
105
+ font-size: 16px;
106
+ margin: 0;
107
+ }
108
+
109
+ /* Resource links styling */
110
+ .resource-links {
111
+ display: flex;
112
+ justify-content: center;
113
+ flex-wrap: wrap;
114
+ gap: 8px;
115
+ margin-bottom: 25px;
116
+ }
117
+
118
+ .resource-link {
119
+ background-color: #222222;
120
+ color: #4aedd6;
121
+ border: 1px solid #333333;
122
+ padding: 8px 16px;
123
+ border-radius: 20px;
124
+ margin: 5px;
125
+ text-decoration: none;
126
+ display: inline-block;
127
+ font-weight: 500;
128
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3);
129
+ transition: all 0.2s ease;
130
+ }
131
+
132
+ .resource-link:hover {
133
+ background-color: #333333;
134
+ transform: translateY(-2px);
135
+ box-shadow: 0 3px 6px rgba(0, 0, 0, 0.4);
136
+ transition: all 0.2s ease;
137
+ }
138
+
139
+ .resource-link-icon {
140
+ margin-right: 5px;
141
+ }
142
+
143
+ /* Footer styling */
144
+ .sonics-footer {
145
+ text-align: center;
146
+ margin-top: 30px;
147
+ padding: 15px;
148
+ }
149
+ """
150
+
151
+ # Create Gradio interface
152
+ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
153
+ # Title and Logo
154
+ gr.HTML(
155
+ """
156
+ <div class="sonics-header">
157
+ <div style="display: flex; justify-content: center; margin-bottom: 20px;">
158
+ <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg" class="sonics-logo">
159
+ </div>
160
+ <h1 class="sonics-title">SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
161
+ <h3 class="sonics-subtitle">ICLR 2025 [Poster]</h3>
162
+ <p class="sonics-description">
163
+ Detect if a song is real or AI-generated with our state-of-the-art models.
164
+ Simply upload an audio file to verify its authenticity!
165
+ </p>
166
+ </div>
167
+ """
168
+ )
169
+
170
+ # Resource Links - Updated with custom styling to match screenshot
171
+ gr.HTML(
172
+ """
173
+ <div class="resource-links">
174
+ <a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank" class="resource-link">
175
+ <span class="resource-link-icon">📄</span>Paper
176
+ </a>
177
+ <a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank" class="resource-link">
178
+ <span class="resource-link-icon">🎵</span>Dataset
179
+ </a>
180
+ <a href="https://huggingface.co/collections/awsaf49/sonics-spectttra-67bb6517b3920fd18e409013" target="_blank" class="resource-link">
181
+ <span class="resource-link-icon">🤖</span>Models
182
+ </a>
183
+ <a href="https://arxiv.org/abs/2408.14080" target="_blank" class="resource-link">
184
+ <span class="resource-link-icon">🔬</span>ArXiv
185
+ </a>
186
+ <a href="https://github.com/awsaf49/sonics" target="_blank" class="resource-link">
187
+ <span class="resource-link-icon">💻</span>GitHub
188
+ </a>
189
+ </div>
190
+ """
191
+ )
192
+
193
+ # Main Interface
194
+ with gr.Row(equal_height=True):
195
+ with gr.Column():
196
+ audio_input = gr.Audio(
197
+ label="Upload Audio File",
198
+ type="filepath",
199
+ elem_id="audio_input"
200
+ )
201
+
202
+ model_dropdown = gr.Dropdown(
203
+ choices=list(MODEL_IDS.keys()),
204
+ value="SpecTTTra-γ (5s)",
205
+ label="Select Model",
206
+ elem_id="model_dropdown"
207
+ )
208
+
209
+ submit_btn = gr.Button(
210
+ "✨ Analyze Audio",
211
+ elem_id="submit_btn",
212
+ variant="primary"
213
+ )
214
+
215
+ with gr.Column():
216
+ # Define output before using it in Examples
217
+ output = gr.Label(
218
+ label="Analysis Result",
219
+ num_top_classes=2,
220
+ elem_id="output"
221
+ )
222
+
223
+ with gr.Accordion("How It Works", open=True):
224
+ gr.Markdown("""
225
+ ### The SONICS classifier
226
+
227
+ The SONICS classifier analyzes your audio to determine if it's an authentic song (human created) or generated by AI. Our models are trained on a diverse dataset of real and AI-generated songs from Suno and Udio.
228
+
229
+ ### Models available:
230
+ - **SpecTTTra-γ**: Optimized for speed
231
+ - **SpecTTTra-β**: Balanced performance
232
+ - **SpecTTTra-α**: Highest accuracy
233
+
234
+ ### Duration variants:
235
+ - **5s**: Analyzes a 5-second clip (faster)
236
+ - **120s**: Analyzes up to 2 minutes (more accurate)
237
+ """)
238
+
239
+ # Add Examples section after output is defined
240
+ with gr.Accordion("Example Audio Files", open=True):
241
+ gr.Examples(
242
+ examples=[
243
+ ["example/real_song.mp3", "SpecTTTra-γ (5s)"],
244
+ ["example/fake_song.mp3", "SpecTTTra-γ (5s)"],
245
+ ],
246
+ inputs=[audio_input, model_dropdown],
247
+ outputs=[output],
248
+ fn=predict,
249
+ cache_examples=True,
250
+ )
251
+
252
+ # Footer
253
+ gr.HTML(
254
+ """
255
+ <div class="sonics-footer">
256
+ <p>SONICS: Synthetic Or Not - Identifying Counterfeit Songs | ICLR 2025</p>
257
+ <p style="font-size: 12px;">For research purposes only</p>
258
+ </div>
259
+ """
260
+ )
261
+
262
+ # Prediction handling
263
+ submit_btn.click(fn=predict, inputs=[audio_input, model_dropdown], outputs=[output])
264
+
265
+ if __name__ == "__main__":
266
  demo.launch()