dac202 commited on
Commit
9e4f7db
·
1 Parent(s): 8c9536c

added more curse words

Browse files
Files changed (2) hide show
  1. app.py +33 -5
  2. fsp.py +61 -15
app.py CHANGED
@@ -13,7 +13,15 @@ import shutil
13
  from fsp import analyze_audio, apply_censoring, default_curse_words, seconds_to_minutes
14
  from datetime import datetime
15
 
16
- # MODIFIED: Print start time and filename
 
 
 
 
 
 
 
 
17
  print(f"Executing {os.path.basename(__file__)} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
18
 
19
  ################ Load models
@@ -33,6 +41,7 @@ tox_pipe = pipeline("text-classification", model=tox_model, tokenizer=tox_tokeni
33
  ## 2. Create our Whisper model from the LoRA weights
34
  ## Whisper_timestamped requires the entire model to be saved, this saves static storage space by only saving the lora config
35
  def load_whisper_model(model_path, lora_config, base_model_name="openai/whisper-medium.en"):
 
36
  if os.path.exists('./whisper-medium-ft/model.safetensors'):
37
  print(f'Fine tuned model at {model_path} already exists')
38
  return
@@ -52,11 +61,11 @@ def load_whisper_model(model_path, lora_config, base_model_name="openai/whisper-
52
  model_path = 'whisper-medium-ft'
53
  lora_config = './lora_config'
54
 
55
- # Uncheck when uploaded to hf
56
  load_whisper_model(model_path=model_path, lora_config=lora_config)
57
 
58
  ###### Helper functions #######
59
 
 
60
  def format_metadata_header(filename, metadata, explicit_word_count):
61
  title, artist, album, year = metadata.get('title', 'N/A'), metadata.get('artist', 'N/A'), metadata.get('album', 'N/A'), metadata.get('year', 'N/A')
62
  genius_url, wer_score = metadata.get('genius_url'), metadata.get('wer_score')
@@ -69,6 +78,7 @@ def format_metadata_header(filename, metadata, explicit_word_count):
69
 
70
  return f"### Details for: *{filename}*\n**Artist:** {artist} | **Song:** {title} | **Album:** {album} ({year}) {genius_link} {wer_display}{status_message}"
71
 
 
72
  def generate_static_transcript(transcript_data, initial_times):
73
  initial_times_set = {f"{t['start']}-{t['end']}" for t in initial_times}
74
  table_header = "<table><thead><tr><th style='width: 125px;'>Time</th><th>Line transcript</th><th>Explicit flag(s)</th></tr></thead><tbody>"
@@ -124,6 +134,7 @@ def generate_static_transcript(transcript_data, initial_times):
124
 
125
  return table_header + "".join(table_rows) + "</tbody></table>"
126
 
 
127
  def handle_batch_analysis(files, progress=gr.Progress()):
128
  if not files:
129
  raise gr.Error("Please upload one or more audio files.")
@@ -143,7 +154,7 @@ def handle_batch_analysis(files, progress=gr.Progress()):
143
  analysis_state = analyze_audio(audio_file.name, model, device, fine_tuned, progress=None)
144
  all_results[filename] = analysis_state
145
  # MODIFIED: Print filename to console after transcription
146
- print(f"Transcription complete for: {filename}")
147
 
148
  file_list = list(all_results.keys())
149
  first_file_results = all_results[file_list[0]]
@@ -152,6 +163,7 @@ def handle_batch_analysis(files, progress=gr.Progress()):
152
  transcript_html = generate_static_transcript(first_file_results['transcript'], first_file_results['initial_explicit_times'])
153
 
154
  # Check if ANY file has explicit content to determine if the apply button should be active
 
155
  any_explicit_content = any(len(res['initial_explicit_times']) > 0 for res in all_results.values())
156
  if any_explicit_content:
157
  apply_button_update = gr.update(interactive=True, value="Apply all edits")
@@ -169,6 +181,7 @@ def handle_batch_analysis(files, progress=gr.Progress()):
169
  apply_button_update
170
  )
171
 
 
172
  def update_details_view(selected_filename, all_results):
173
  if not selected_filename or not all_results:
174
  return "", ""
@@ -179,6 +192,7 @@ def update_details_view(selected_filename, all_results):
179
  transcript_html = generate_static_transcript(file_results['transcript'], file_results['initial_explicit_times'])
180
  return header, transcript_html
181
 
 
182
  def handle_batch_finalization(all_results, progress=gr.Progress()):
183
  if not all_results:
184
  raise gr.Error("No active analysis session. Please process files first.")
@@ -204,6 +218,7 @@ def handle_batch_finalization(all_results, progress=gr.Progress()):
204
  gr.update(visible=False)
205
  )
206
 
 
207
  def return_to_start(all_results):
208
  """Cleans up all temporary directories and resets the UI to its initial state."""
209
  if all_results:
@@ -230,7 +245,9 @@ def return_to_start(all_results):
230
  )
231
 
232
 
233
- ###### Gradio UI Definition ########
 
 
234
  css = """
235
  #main-container { max-width: 1250px; margin: auto; }
236
  #main-container .prose { font-size: 15px !important; }
@@ -244,11 +261,13 @@ s { color: #d32f2f; text-decoration: line-through; }
244
  with gr.Blocks(theme=gr.themes.Soft(), title="FSP Finder", css=css) as demo:
245
  analysis_results_state = gr.State(None)
246
 
 
247
  with gr.Column(elem_id="main-container"):
248
  gr.Markdown("# FSP Finder - AI-powered explicit content detector")
249
  gr.Markdown("Detects and automatically censors explicit content in music files. For source code and more details, visit our [github page](https://github.com/dclark202/auto-censoring).")
250
  gr.Markdown("---")
251
 
 
252
  with gr.Column(visible=True) as upload_view:
253
  gr.Markdown("### How to use")
254
  gr.Markdown('- Upload one or more audio files using the box below. Most common audio formats are accepted (e.g., `.mp3`, `.wav`, etc.).')
@@ -261,6 +280,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="FSP Finder", css=css) as demo:
261
  gr.Markdown('### How it works')
262
  gr.Markdown("This app uses a fine-tuned version of OpenAI's automatic speech recognition model [Whisper](https://github.com/openai/whisper) to create a lyrics transcript of the uploaded music files. Explicit content (e.g., curse words) are then searched for in the lyrics transcript and highlighted. The vocals stem of the track is split off from the song using [demucs](https://github.com/facebookresearch/demucs) and muted at the appropriate times to create a high-quality edited version of the song.")
263
 
 
264
  with gr.Column(visible=False) as review_view:
265
  gr.Markdown("### Review transcript(s) and apply edits")
266
  gr.Markdown(f'Words to be censored will appear in <caption>{html.escape("red strikethrough")}</s> text in the transcript below. Apply edits by clicking **Apply all edits** below.')
@@ -288,28 +308,35 @@ with gr.Blocks(theme=gr.themes.Soft(), title="FSP Finder", css=css) as demo:
288
  with gr.Accordion("Full audio transcript", open=True):
289
  transcript_output = gr.HTML()
290
 
 
 
291
  with gr.Column(visible=False, elem_id="loading-view") as loading_view:
292
  gr.Markdown("## ⏳ Processing... please wait")
293
 
294
- # --- Event Handlers ---
 
 
295
  process_button.click(
296
  fn=handle_batch_analysis,
297
  inputs=[files_input],
298
  outputs=[upload_view, review_view, loading_view, analysis_results_state, processed_files_selector, details_header, transcript_output, apply_button]
299
  )
300
 
 
301
  processed_files_selector.change(
302
  fn=update_details_view,
303
  inputs=[processed_files_selector, analysis_results_state],
304
  outputs=[details_header, transcript_output]
305
  )
306
 
 
307
  apply_button.click(
308
  fn=handle_batch_finalization,
309
  inputs=[analysis_results_state],
310
  outputs=[review_view, loading_view, final_view, final_status_output, edited_files_output, processed_files_selector, apply_button]
311
  )
312
 
 
313
  return_to_start_button.click(
314
  fn=return_to_start,
315
  inputs=[analysis_results_state],
@@ -329,4 +356,5 @@ with gr.Blocks(theme=gr.themes.Soft(), title="FSP Finder", css=css) as demo:
329
  js="() => { if (confirm('Are you sure you want to return to the start? All current analysis will be lost.')) { return true; } else { return false; } }"
330
  )
331
 
 
332
  demo.launch(share=True, favicon_path='fav.png')
 
13
  from fsp import analyze_audio, apply_censoring, default_curse_words, seconds_to_minutes
14
  from datetime import datetime
15
 
16
+
17
+ ###### Ideas ########
18
+ # - Javascript for toggling individual words to mute --> playright
19
+ # - Use LLM to determine what is "explicit" in the ouputs --> structured output?
20
+ # - Mute explicit nonvocal sounds: e.g., gun shots, sex scenes, etc.
21
+ # - Additional words to censor at the beginning screen ?
22
+
23
+
24
+ # Print the start time
25
  print(f"Executing {os.path.basename(__file__)} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
26
 
27
  ################ Load models
 
41
  ## 2. Create our Whisper model from the LoRA weights
42
  ## Whisper_timestamped requires the entire model to be saved, this saves static storage space by only saving the lora config
43
  def load_whisper_model(model_path, lora_config, base_model_name="openai/whisper-medium.en"):
44
+ # If the model exists already we're good to go
45
  if os.path.exists('./whisper-medium-ft/model.safetensors'):
46
  print(f'Fine tuned model at {model_path} already exists')
47
  return
 
61
  model_path = 'whisper-medium-ft'
62
  lora_config = './lora_config'
63
 
 
64
  load_whisper_model(model_path=model_path, lora_config=lora_config)
65
 
66
  ###### Helper functions #######
67
 
68
+ # Metadata display for the full transcriptions. Includes genius link if possible
69
  def format_metadata_header(filename, metadata, explicit_word_count):
70
  title, artist, album, year = metadata.get('title', 'N/A'), metadata.get('artist', 'N/A'), metadata.get('album', 'N/A'), metadata.get('year', 'N/A')
71
  genius_url, wer_score = metadata.get('genius_url'), metadata.get('wer_score')
 
78
 
79
  return f"### Details for: *{filename}*\n**Artist:** {artist} | **Song:** {title} | **Album:** {album} ({year}) {genius_link} {wer_display}{status_message}"
80
 
81
+ # Creates the table of the transcription
82
  def generate_static_transcript(transcript_data, initial_times):
83
  initial_times_set = {f"{t['start']}-{t['end']}" for t in initial_times}
84
  table_header = "<table><thead><tr><th style='width: 125px;'>Time</th><th>Line transcript</th><th>Explicit flag(s)</th></tr></thead><tbody>"
 
134
 
135
  return table_header + "".join(table_rows) + "</tbody></table>"
136
 
137
+ # Execute the whisper model for transcription
138
  def handle_batch_analysis(files, progress=gr.Progress()):
139
  if not files:
140
  raise gr.Error("Please upload one or more audio files.")
 
154
  analysis_state = analyze_audio(audio_file.name, model, device, fine_tuned, progress=None)
155
  all_results[filename] = analysis_state
156
  # MODIFIED: Print filename to console after transcription
157
+ print(f"Transcription complete for: {filename} (file {i+1} of {num_files})")
158
 
159
  file_list = list(all_results.keys())
160
  first_file_results = all_results[file_list[0]]
 
163
  transcript_html = generate_static_transcript(first_file_results['transcript'], first_file_results['initial_explicit_times'])
164
 
165
  # Check if ANY file has explicit content to determine if the apply button should be active
166
+ # If not, display no edits to make
167
  any_explicit_content = any(len(res['initial_explicit_times']) > 0 for res in all_results.values())
168
  if any_explicit_content:
169
  apply_button_update = gr.update(interactive=True, value="Apply all edits")
 
181
  apply_button_update
182
  )
183
 
184
+ # Selecting between different transcripts
185
  def update_details_view(selected_filename, all_results):
186
  if not selected_filename or not all_results:
187
  return "", ""
 
192
  transcript_html = generate_static_transcript(file_results['transcript'], file_results['initial_explicit_times'])
193
  return header, transcript_html
194
 
195
+ # Apply the edits to all songs
196
  def handle_batch_finalization(all_results, progress=gr.Progress()):
197
  if not all_results:
198
  raise gr.Error("No active analysis session. Please process files first.")
 
218
  gr.update(visible=False)
219
  )
220
 
221
+ # Clear temp files and return to start
222
  def return_to_start(all_results):
223
  """Cleans up all temporary directories and resets the UI to its initial state."""
224
  if all_results:
 
245
  )
246
 
247
 
248
+ ###### Gradio UI ########
249
+
250
+ ## CSS for formatting
251
  css = """
252
  #main-container { max-width: 1250px; margin: auto; }
253
  #main-container .prose { font-size: 15px !important; }
 
261
  with gr.Blocks(theme=gr.themes.Soft(), title="FSP Finder", css=css) as demo:
262
  analysis_results_state = gr.State(None)
263
 
264
+ # Main header. Persistent over all pages
265
  with gr.Column(elem_id="main-container"):
266
  gr.Markdown("# FSP Finder - AI-powered explicit content detector")
267
  gr.Markdown("Detects and automatically censors explicit content in music files. For source code and more details, visit our [github page](https://github.com/dclark202/auto-censoring).")
268
  gr.Markdown("---")
269
 
270
+ # Upload page
271
  with gr.Column(visible=True) as upload_view:
272
  gr.Markdown("### How to use")
273
  gr.Markdown('- Upload one or more audio files using the box below. Most common audio formats are accepted (e.g., `.mp3`, `.wav`, etc.).')
 
280
  gr.Markdown('### How it works')
281
  gr.Markdown("This app uses a fine-tuned version of OpenAI's automatic speech recognition model [Whisper](https://github.com/openai/whisper) to create a lyrics transcript of the uploaded music files. Explicit content (e.g., curse words) are then searched for in the lyrics transcript and highlighted. The vocals stem of the track is split off from the song using [demucs](https://github.com/facebookresearch/demucs) and muted at the appropriate times to create a high-quality edited version of the song.")
282
 
283
+ # Results page
284
  with gr.Column(visible=False) as review_view:
285
  gr.Markdown("### Review transcript(s) and apply edits")
286
  gr.Markdown(f'Words to be censored will appear in <caption>{html.escape("red strikethrough")}</s> text in the transcript below. Apply edits by clicking **Apply all edits** below.')
 
308
  with gr.Accordion("Full audio transcript", open=True):
309
  transcript_output = gr.HTML()
310
 
311
+ # Processing page. I want this to display more information about what is happening behind the scenes
312
+ # e.g., to inform the user that the program has not just crashed
313
  with gr.Column(visible=False, elem_id="loading-view") as loading_view:
314
  gr.Markdown("## ⏳ Processing... please wait")
315
 
316
+ # Buttons
317
+
318
+ # Process all inputs
319
  process_button.click(
320
  fn=handle_batch_analysis,
321
  inputs=[files_input],
322
  outputs=[upload_view, review_view, loading_view, analysis_results_state, processed_files_selector, details_header, transcript_output, apply_button]
323
  )
324
 
325
+ # Select between multiple files
326
  processed_files_selector.change(
327
  fn=update_details_view,
328
  inputs=[processed_files_selector, analysis_results_state],
329
  outputs=[details_header, transcript_output]
330
  )
331
 
332
+ # Apply edits
333
  apply_button.click(
334
  fn=handle_batch_finalization,
335
  inputs=[analysis_results_state],
336
  outputs=[review_view, loading_view, final_view, final_status_output, edited_files_output, processed_files_selector, apply_button]
337
  )
338
 
339
+ # Go back to start. The JS for the confirmation is not working!
340
  return_to_start_button.click(
341
  fn=return_to_start,
342
  inputs=[analysis_results_state],
 
356
  js="() => { if (confirm('Are you sure you want to return to the start? All current analysis will be lost.')) { return true; } else { return false; } }"
357
  )
358
 
359
+ # Made a little favicon :)
360
  demo.launch(share=True, favicon_path='fav.png')
fsp.py CHANGED
@@ -11,16 +11,45 @@ import jiwer
11
  import shutil
12
  import tempfile
13
 
14
- GENIUS_API_TOKEN = os.getenv("GENIUS_API_TOKEN") # Or your key here!
 
 
 
 
15
  genius = lyricsgenius.Genius(GENIUS_API_TOKEN, verbose=False, remove_section_headers=True)
16
 
17
- default_curse_words = {'fuck', 'shit', 'piss', 'bitch', 'nigg', 'cock', 'faggot', 'cunt', 'clint', 'tits', 'pussy', 'dick', 'asshole', 'whore', 'goddam'}
18
 
19
- # --- Helper Functions (remove_punctuation, get_metadata, etc.) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def remove_punctuation(s):
21
  s = re.sub(r'[^a-zA-Z0-9\s]', '', s)
22
  return s.lower()
23
 
 
24
  def silence_audio_segment(input_audio_path, output_audio_path, times):
25
  audio = AudioSegment.from_file(input_audio_path)
26
  for (start_ms, end_ms) in times:
@@ -30,12 +59,14 @@ def silence_audio_segment(input_audio_path, output_audio_path, times):
30
  audio = before_segment + target_segment + after_segment
31
  audio.export(output_audio_path, format='wav')
32
 
 
33
  def combine_audio(path1, path2, outpath):
34
  audio1 = AudioSegment.from_file(path1, format='wav')
35
  audio2 = AudioSegment.from_file(path2, format='wav')
36
  combined_audio = audio1.overlay(audio2)
37
  combined_audio.export(outpath, format="mp3")
38
 
 
39
  def get_metadata(original_audio_path):
40
  try:
41
  audio_orig = EasyID3(original_audio_path)
@@ -44,6 +75,7 @@ def get_metadata(original_audio_path):
44
  metadata = {'title': 'N/A', 'artist': 'N/A', 'album': 'N/A', 'year': 'N/A'}
45
  return metadata
46
 
 
47
  def transfer_metadata(original_audio_path, edited_audio_path):
48
  try:
49
  audio_orig = EasyID3(original_audio_path)
@@ -54,6 +86,7 @@ def transfer_metadata(original_audio_path, edited_audio_path):
54
  except Exception as e:
55
  print(f"Could not transfer metadata: {e}")
56
 
 
57
  def seconds_to_minutes(time):
58
  mins = int(time // 60)
59
  secs = int(time % 60)
@@ -67,6 +100,7 @@ def seconds_to_minutes(time):
67
  else:
68
  return f"{mins}:{secs}"
69
 
 
70
  def get_genius_url(artist, song_title):
71
  if not artist or not song_title or artist == 'N/A' or song_title == 'N/A': return None
72
  try:
@@ -74,6 +108,7 @@ def get_genius_url(artist, song_title):
74
  return song.url if song else None
75
  except Exception: return None
76
 
 
77
  def calculate_wer(ground_truth, hypothesis):
78
  if not ground_truth or not hypothesis or "not available" in ground_truth.lower(): return None
79
  try:
@@ -82,6 +117,7 @@ def calculate_wer(ground_truth, hypothesis):
82
  return f"{error:.3f}"
83
  except Exception: return "Error"
84
 
 
85
  def get_genius_lyrics(artist, song_title):
86
  if not artist or not song_title or artist == 'N/A' or song_title == 'N/A': return "Lyrics not available (missing metadata)."
87
  try:
@@ -92,6 +128,8 @@ def get_genius_lyrics(artist, song_title):
92
  ##########################################################
93
  # STEP 1: Analyze Audio, Separate Tracks, and Transcribe #
94
  ##########################################################
 
 
95
  def analyze_audio(audio_path, model, device, fine_tuned=True, progress=None):
96
  """
97
  Performs audio separation and transcription. Does NOT apply any edits.
@@ -128,12 +166,14 @@ def analyze_audio(audio_path, model, device, fine_tuned=True, progress=None):
128
  full_transcript = []
129
  initial_explicit_times = []
130
 
 
 
 
 
131
  for segment in result["segments"]:
132
  segment_words = []
133
- seg = segment.get('words', [])
134
- prev_word = ''
135
-
136
- for i, word_info in enumerate(seg):
137
  word_text = word_info.get(word_key, '').strip()
138
  if not word_text: continue
139
 
@@ -145,18 +185,23 @@ def analyze_audio(audio_path, model, device, fine_tuned=True, progress=None):
145
 
146
  word_data = {'text': word_text, 'start': start_time, 'end': end_time, 'prob': word_info[prob_key]}
147
  segment_words.append(word_data)
148
-
149
- if is_explicit:
 
150
  initial_explicit_times.append({'start': start_time, 'end': end_time})
151
-
152
- # Handle two word cluster "god damn"
153
- if cleaned_word == 'damn' and prev_word == 'god':
154
- god_start = seg[i-1]['start']
155
- god_end = seg[i-1]['end']
156
- initial_explicit_times.append({'start': god_start, 'end': god_end})
 
 
 
157
  initial_explicit_times.append({'start': start_time, 'end': end_time})
158
 
159
  prev_word = cleaned_word
 
160
 
161
  full_transcript.append({'line_words': segment_words, 'start': segment['start'], 'end': segment['end']})
162
 
@@ -180,6 +225,7 @@ def analyze_audio(audio_path, model, device, fine_tuned=True, progress=None):
180
  # STEP 2: Apply Censoring and Finalize Audio #
181
  ##############################################
182
 
 
183
  def apply_censoring(analysis_state, times_to_censor, progress=None):
184
  """
185
  Takes the state from analyze_audio and a final list of timestamps,
 
11
  import shutil
12
  import tempfile
13
 
14
+
15
+
16
+ ## Get a genius API key at https://genius.com/api-clients
17
+ ## put your key in system environment at GENIUS_API_TOKEN or set it manually here
18
+ GENIUS_API_TOKEN = os.getenv("GENIUS_API_TOKEN")
19
  genius = lyricsgenius.Genius(GENIUS_API_TOKEN, verbose=False, remove_section_headers=True)
20
 
 
21
 
22
+ #############################################################################
23
+ ### just a heads up there's a bunch of curse words and racial slurs below ###
24
+ #############################################################################
25
+
26
+
27
+ # List of words to search for to be muted:
28
+ # The way this works currently is that we look for these words as **substrings** of each transcribed word
29
+ # this means that 'fuck' handles all versions 'fucking', 'motherfucker', 'fucked', etc.
30
+ # This method is a bit crude as it can lead to some false positive, ex. 'Dickens' would be censored.
31
+ # Consider using an LLM on the output for classification?
32
+ default_curse_words = {
33
+ 'fuck', 'shit', 'piss', 'bitch', 'nigg', 'dyke', 'cock', 'faggot',
34
+ 'cunt', 'tits', 'pussy', 'dick', 'asshole', 'whore', 'goddam',
35
+ 'douche', 'chink', 'tranny', 'slut', 'jizz', 'kike', 'gook'
36
+ }
37
+
38
+ # Words for which the substring method will absolutely not work
39
+ singular_curse_words = {
40
+ 'fag', 'cum', 'hell', 'spic', 'clit', 'wank', 'ass'
41
+ }
42
+
43
+ ######################################################
44
+ # Helper functions required for the gradio interface #
45
+ ######################################################
46
+
47
+ # Removes all punctuation and returns lower case only words
48
  def remove_punctuation(s):
49
  s = re.sub(r'[^a-zA-Z0-9\s]', '', s)
50
  return s.lower()
51
 
52
+ # For silencing the audio tracks at the indicated times
53
  def silence_audio_segment(input_audio_path, output_audio_path, times):
54
  audio = AudioSegment.from_file(input_audio_path)
55
  for (start_ms, end_ms) in times:
 
59
  audio = before_segment + target_segment + after_segment
60
  audio.export(output_audio_path, format='wav')
61
 
62
+ # For combining the vocals and instrument stems once the censoring has been applied
63
  def combine_audio(path1, path2, outpath):
64
  audio1 = AudioSegment.from_file(path1, format='wav')
65
  audio2 = AudioSegment.from_file(path2, format='wav')
66
  combined_audio = audio1.overlay(audio2)
67
  combined_audio.export(outpath, format="mp3")
68
 
69
+ # Extracts metadata from the original song
70
  def get_metadata(original_audio_path):
71
  try:
72
  audio_orig = EasyID3(original_audio_path)
 
75
  metadata = {'title': 'N/A', 'artist': 'N/A', 'album': 'N/A', 'year': 'N/A'}
76
  return metadata
77
 
78
+ # Transfers metadata between two songs
79
  def transfer_metadata(original_audio_path, edited_audio_path):
80
  try:
81
  audio_orig = EasyID3(original_audio_path)
 
86
  except Exception as e:
87
  print(f"Could not transfer metadata: {e}")
88
 
89
+ # Probably overcomplicated function to convert time in seconds to mm:ss format
90
  def seconds_to_minutes(time):
91
  mins = int(time // 60)
92
  secs = int(time % 60)
 
100
  else:
101
  return f"{mins}:{secs}"
102
 
103
+ # Lookup url on genius of lyrics for given song
104
  def get_genius_url(artist, song_title):
105
  if not artist or not song_title or artist == 'N/A' or song_title == 'N/A': return None
106
  try:
 
108
  return song.url if song else None
109
  except Exception: return None
110
 
111
+ # It's called calculate_wer but I'm actually using *mer*
112
  def calculate_wer(ground_truth, hypothesis):
113
  if not ground_truth or not hypothesis or "not available" in ground_truth.lower(): return None
114
  try:
 
117
  return f"{error:.3f}"
118
  except Exception: return "Error"
119
 
120
+ # Gets the lyrics from genius for a given song
121
  def get_genius_lyrics(artist, song_title):
122
  if not artist or not song_title or artist == 'N/A' or song_title == 'N/A': return "Lyrics not available (missing metadata)."
123
  try:
 
128
  ##########################################################
129
  # STEP 1: Analyze Audio, Separate Tracks, and Transcribe #
130
  ##########################################################
131
+
132
+ # Obtain transcript from song using Whisper. Whisper_timestamps handles all the splitting of the segments
133
  def analyze_audio(audio_path, model, device, fine_tuned=True, progress=None):
134
  """
135
  Performs audio separation and transcription. Does NOT apply any edits.
 
166
  full_transcript = []
167
  initial_explicit_times = []
168
 
169
+ # Certain phrases can run two words, we need a previous word catcher
170
+ prev_word = ''
171
+ prev_start, prev_end = 0.0, 0.0
172
+
173
  for segment in result["segments"]:
174
  segment_words = []
175
+
176
+ for word_info in segment.get('words', []):
 
 
177
  word_text = word_info.get(word_key, '').strip()
178
  if not word_text: continue
179
 
 
185
 
186
  word_data = {'text': word_text, 'start': start_time, 'end': end_time, 'prob': word_info[prob_key]}
187
  segment_words.append(word_data)
188
+
189
+ # Short words that can be substrings of nonsensitive words
190
+ if cleaned_word in singular_curse_words:
191
  initial_explicit_times.append({'start': start_time, 'end': end_time})
192
+
193
+ # Handle two word cluster "god dam*", "mother fuck*".
194
+ # Other ones: jerk off, cock sucker, ... ?
195
+ elif ('dam' in cleaned_word and prev_word == 'god') or ('fuck' in cleaned_word and prev_word == 'mother') or (cleaned_word == 'off' and prev_word == 'jerk'):
196
+ initial_explicit_times.append({'start': prev_start, 'end': prev_end})
197
+ initial_explicit_times.append({'start': start_time, 'end': end_time})
198
+
199
+ # The majority of censored words will come from here
200
+ elif is_explicit:
201
  initial_explicit_times.append({'start': start_time, 'end': end_time})
202
 
203
  prev_word = cleaned_word
204
+ prev_start, prev_end = start_time, end_time
205
 
206
  full_transcript.append({'line_words': segment_words, 'start': segment['start'], 'end': segment['end']})
207
 
 
225
  # STEP 2: Apply Censoring and Finalize Audio #
226
  ##############################################
227
 
228
+ # Applies the censoring at the indicated times
229
  def apply_censoring(analysis_state, times_to_censor, progress=None):
230
  """
231
  Takes the state from analyze_audio and a final list of timestamps,