awsaf49 commited on
Commit
11f59f6
·
1 Parent(s): 3f50570

code simplified

Browse files
Files changed (1) hide show
  1. app.py +43 -71
app.py CHANGED
@@ -1,15 +1,11 @@
1
  import os
2
- import math
3
- import gradio as gr
4
  import torch
5
  import librosa
6
- import pandas as pd
7
  import numpy as np
8
-
9
  from sonics import HFAudioClassifier
10
 
11
-
12
- # Constants
13
  MODEL_IDS = {
14
  "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
15
  "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
@@ -19,11 +15,9 @@ MODEL_IDS = {
19
  "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
20
  }
21
 
22
-
23
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
  model_cache = {}
25
 
26
-
27
  def load_model(model_name):
28
  """Load model if not already cached"""
29
  if model_name not in model_cache:
@@ -34,21 +28,14 @@ def load_model(model_name):
34
  model_cache[model_name] = model
35
  return model_cache[model_name]
36
 
37
-
38
  def process_audio(audio_path, model_name):
39
  """Process audio file and return prediction"""
40
  try:
41
- # Load model
42
  model = load_model(model_name)
43
-
44
- # Get max time from model config
45
  max_time = model.config.audio.max_time
46
-
47
  # Load and process audio
48
  audio, sr = librosa.load(audio_path, sr=16000)
49
- duration = len(audio) / sr
50
-
51
- # Calculate chunk size and middle position
52
  chunk_samples = int(max_time * sr)
53
  total_chunks = len(audio) // chunk_samples
54
  middle_chunk_idx = total_chunks // 2
@@ -57,89 +44,74 @@ def process_audio(audio_path, model_name):
57
  start = middle_chunk_idx * chunk_samples
58
  end = start + chunk_samples
59
  chunk = audio[start:end]
60
-
61
- # Pad if needed (shouldn't be necessary for middle chunk)
62
  if len(chunk) < chunk_samples:
63
  chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
64
-
65
- # Convert to tensor and get prediction
66
  with torch.no_grad():
67
  chunk = torch.from_numpy(chunk).float().to(device)
68
  pred = model(chunk.unsqueeze(0))
69
  prob = torch.sigmoid(pred).cpu().numpy()[0]
70
-
71
- # Get prediction
72
- output = {"Real": 1 - prob, "Fake": prob}
73
-
74
- return output
75
 
76
  except Exception as e:
77
- return {
78
- "Duration": "Error",
79
- "Prediction": f"Error: {str(e)}",
80
- "Confidence": "N/A",
81
- }
82
-
83
 
84
  def predict(audio_file, model_name):
85
  """Gradio interface function"""
86
  if audio_file is None:
87
- return {
88
- "Duration": "No file",
89
- "Prediction": "Please upload an audio file",
90
- "Confidence": "N/A",
91
- }
92
-
93
  return process_audio(audio_file, model_name)
94
 
95
-
96
  # Create Gradio interface
97
- css = """
98
- .heading {
99
- text-align: center;
100
- margin-bottom: 2rem;
101
- }
102
- .logo {
103
- max-width: 250px;
104
- margin: 0 auto;
105
- display: block;
106
- }
107
- """
108
-
109
- with gr.Blocks(css=css) as demo:
110
  gr.HTML(
111
  """
112
- <div class="heading">
113
- <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg" class="logo">
 
114
  <h1>SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
115
- <h3><span style="color:red;"><b>ICLR 2025 [Poster]</b></span></h3>
116
  </div>
117
- """
118
  )
119
-
120
  with gr.Row():
121
  with gr.Column():
122
- audio_input = gr.Audio(label="Upload Audio", type="filepath")
 
 
 
123
  model_dropdown = gr.Dropdown(
124
  choices=list(MODEL_IDS.keys()),
125
  value="SpecTTTra-γ (5s)",
126
- label="Select Model",
127
  )
128
- submit_btn = gr.Button("Predict")
129
-
130
  with gr.Column():
131
- output = gr.Label(label="Result", num_top_classes=2)
132
-
133
- submit_btn.click(fn=predict, inputs=[audio_input, model_dropdown], outputs=[output])
134
-
 
135
  gr.Markdown(
136
  """
137
- ## Resources
138
- - 📄 [Paper](https://openreview.net/forum?id=PY7KSh29Z8)
139
- - 🎵 [Dataset](https://huggingface.co/datasets/awsaf49/sonics)
140
- - 🔬 [ArXiv](https://arxiv.org/abs/2408.14080)
141
- - 💻 [GitHub](https://github.com/awsaf49/sonics)
142
- """
 
 
 
 
 
 
143
  )
144
 
145
- demo.launch()
 
 
1
  import os
 
 
2
  import torch
3
  import librosa
 
4
  import numpy as np
5
+ import gradio as gr
6
  from sonics import HFAudioClassifier
7
 
8
+ # Model configurations
 
9
  MODEL_IDS = {
10
  "SpecTTTra-α (5s)": "awsaf49/sonics-spectttra-alpha-5s",
11
  "SpecTTTra-β (5s)": "awsaf49/sonics-spectttra-beta-5s",
 
15
  "SpecTTTra-γ (120s)": "awsaf49/sonics-spectttra-gamma-120s",
16
  }
17
 
 
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
  model_cache = {}
20
 
 
21
  def load_model(model_name):
22
  """Load model if not already cached"""
23
  if model_name not in model_cache:
 
28
  model_cache[model_name] = model
29
  return model_cache[model_name]
30
 
 
31
  def process_audio(audio_path, model_name):
32
  """Process audio file and return prediction"""
33
  try:
 
34
  model = load_model(model_name)
 
 
35
  max_time = model.config.audio.max_time
36
+
37
  # Load and process audio
38
  audio, sr = librosa.load(audio_path, sr=16000)
 
 
 
39
  chunk_samples = int(max_time * sr)
40
  total_chunks = len(audio) // chunk_samples
41
  middle_chunk_idx = total_chunks // 2
 
44
  start = middle_chunk_idx * chunk_samples
45
  end = start + chunk_samples
46
  chunk = audio[start:end]
47
+
 
48
  if len(chunk) < chunk_samples:
49
  chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
50
+
51
+ # Get prediction
52
  with torch.no_grad():
53
  chunk = torch.from_numpy(chunk).float().to(device)
54
  pred = model(chunk.unsqueeze(0))
55
  prob = torch.sigmoid(pred).cpu().numpy()[0]
56
+
57
+ return {"Real": 1 - prob, "Fake": prob}
 
 
 
58
 
59
  except Exception as e:
60
+ return {"Error": str(e)}
 
 
 
 
 
61
 
62
  def predict(audio_file, model_name):
63
  """Gradio interface function"""
64
  if audio_file is None:
65
+ return {"Message": "Please upload an audio file"}
 
 
 
 
 
66
  return process_audio(audio_file, model_name)
67
 
 
68
  # Create Gradio interface
69
+ with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
70
  gr.HTML(
71
  """
72
+ <div style="text-align: center; margin-bottom: 1rem;">
73
+ <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg"
74
+ style="max-width: 300px; margin: 0 auto;">
75
  <h1>SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
76
+ <h3>ICLR 2025 [Poster]</h3>
77
  </div>
78
+ """
79
  )
80
+
81
  with gr.Row():
82
  with gr.Column():
83
+ audio_input = gr.Audio(
84
+ label="Upload Audio File",
85
+ type="filepath"
86
+ )
87
  model_dropdown = gr.Dropdown(
88
  choices=list(MODEL_IDS.keys()),
89
  value="SpecTTTra-γ (5s)",
90
+ label="Select Model"
91
  )
92
+ submit_btn = gr.Button("Analyze Audio")
93
+
94
  with gr.Column():
95
+ output = gr.Label(
96
+ label="Analysis Result",
97
+ num_top_classes=2
98
+ )
99
+
100
  gr.Markdown(
101
  """
102
+ ### Resources
103
+ - [📄 Paper](https://openreview.net/forum?id=PY7KSh29Z8)
104
+ - [🎵 Dataset](https://huggingface.co/datasets/awsaf49/sonics)
105
+ - [🔬 ArXiv](https://arxiv.org/abs/2408.14080)
106
+ - [💻 GitHub](https://github.com/awsaf49/sonics)
107
+ """
108
+ )
109
+
110
+ submit_btn.click(
111
+ fn=predict,
112
+ inputs=[audio_input, model_dropdown],
113
+ outputs=[output]
114
  )
115
 
116
+ if __name__ == "__main__":
117
+ demo.launch()