LAP-DEV commited on
Commit
5034a0a
·
verified ·
1 Parent(s): b3e714e

Update modules/utils/subtitle_manager.py

Browse files
Files changed (1) hide show
  1. modules/utils/subtitle_manager.py +22 -8
modules/utils/subtitle_manager.py CHANGED
@@ -32,16 +32,30 @@ def get_srt(segments):
32
  output += f"{segment['text']}\n\n"
33
  return output
34
 
35
- def get_csv(segments,diarization: bool = False):
36
- if diarization:
37
- output = "Line;Start time;End time;Speaker;Text;\n"
38
- else:
39
- output = "Line;Start time;End time;Text;\n"
40
-
41
  for i, segment in enumerate(segments):
42
  if segment['text'].startswith(' '):
43
- segment['text'] = segment['text'][1:]
44
- output += f"{i + 1};{timeformat_srt(segment['start'])};{timeformat_srt(segment['end'])};{segment['text']};\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  return output
46
 
47
  def get_vtt(segments):
 
32
  output += f"{segment['text']}\n\n"
33
  return output
34
 
35
+ def get_csv(segments):
36
+ bDiarization = False
37
+ output = ""
38
+
 
 
39
  for i, segment in enumerate(segments):
40
  if segment['text'].startswith(' '):
41
+ segment['text'] = segment['text'][1:]
42
+
43
+ # Check if speakers are identified and get speaker id & text
44
+ temp = re.search(r'SPEAKER_[0-9][0-9]: ',segment['text'])
45
+ if temp != None:
46
+ speaker_id = (temp.group()).replace(': ','')
47
+ speaker_text = test_input.replace(temp.group(),'')
48
+ output += f"{i + 1};{timeformat_srt(segment['start'])};{timeformat_srt(segment['end'])};{speaker_id};{speaker_text};\n"
49
+ bDiarization = True
50
+ else:
51
+ output += f"{i + 1};{timeformat_srt(segment['start'])};{timeformat_srt(segment['end'])};{segment['text']};\n"
52
+
53
+ # Add titles to csv file
54
+ if bDiarization:
55
+ output = "Line;Start time;End time;Speaker;Text;\n" + output
56
+ else:
57
+ output = "Line;Start time;End time;Text;\n" + output
58
+
59
  return output
60
 
61
  def get_vtt(segments):