terry-li-hm commited on
Commit
8a46051
·
1 Parent(s): 9844c20

Update `sv.py`

Browse files
Files changed (1) hide show
  1. sv.py +40 -27
sv.py CHANGED
@@ -114,31 +114,44 @@ def format_text_with_emojis(s):
114
  return s.strip()
115
 
116
 
117
- def format_str_v3(s):
118
- def get_emo(s):
119
- return s[-1] if s[-1] in emo_set else None
120
-
121
- def get_event(s):
122
- return s[0] if s[0] in event_set else None
123
-
124
- s = s.replace("<|nospeech|><|Event_UNK|>", "❓")
125
- for lang in lang_dict:
126
- s = s.replace(lang, "<|lang|>")
127
- s_list = [format_text_with_emojis(s_i).strip(" ") for s_i in s.split("<|lang|>")]
128
- new_s = " " + s_list[0]
129
- cur_ent_event = get_event(new_s)
130
- for i in range(1, len(s_list)):
131
- if len(s_list[i]) == 0:
 
 
 
 
132
  continue
133
- if get_event(s_list[i]) == cur_ent_event and get_event(s_list[i]) != None:
134
- s_list[i] = s_list[i][1:]
135
- # else:
136
- cur_ent_event = get_event(s_list[i])
137
- if get_emo(s_list[i]) != None and get_emo(s_list[i]) == get_emo(new_s):
138
- new_s = new_s[:-1]
139
- new_s += s_list[i].strip().lstrip()
140
- new_s = new_s.replace("The.", " ")
141
- return new_s.strip()
 
 
 
 
 
 
 
 
 
142
 
143
 
144
  def time_to_seconds(time_str):
@@ -306,10 +319,10 @@ def process_audio(audio_path, language="yue", fs=16000):
306
  )
307
  text = text[0]["text"]
308
 
309
- # Print the text before format_str_v3
310
- print(f"Text before format_str_v3: {text}")
311
 
312
- text = format_str_v3(text)
313
 
314
  # Handle empty transcriptions
315
  if not text.strip():
 
114
  return s.strip()
115
 
116
 
117
+ def clean_and_emoji_annotate_speech(text):
118
+ def get_emoji(s, emoji_set):
119
+ return next((char for char in s if char in emoji_set), None)
120
+
121
+ # Replace special tags
122
+ text = text.replace("<|nospeech|><|Event_UNK|>", "❓")
123
+ for lang, replacement in lang_dict.items():
124
+ text = text.replace(lang, replacement)
125
+
126
+ # Process each language segment
127
+ segments = [
128
+ format_text_with_emojis(segment.strip()) for segment in text.split("<|lang|>")
129
+ ]
130
+
131
+ formatted_segments = []
132
+ prev_event = prev_emotion = None
133
+
134
+ for segment in segments:
135
+ if not segment:
136
  continue
137
+
138
+ current_event = get_emoji(segment, event_set)
139
+ current_emotion = get_emoji(
140
+ segment, emo_set
141
+ ) # Check for emotion emoji anywhere in the segment
142
+
143
+ if current_event is not None:
144
+ segment = segment[1:] if segment.startswith(current_event) else segment
145
+
146
+ # Preserve emotion emoji if it's different from the previous one
147
+ if current_emotion is not None and current_emotion != prev_emotion:
148
+ segment = segment.replace(current_emotion, "") + current_emotion
149
+
150
+ formatted_segments.append(segment.strip())
151
+ prev_event, prev_emotion = current_event, current_emotion
152
+
153
+ result = " ".join(formatted_segments).replace("The.", "").strip()
154
+ return result
155
 
156
 
157
  def time_to_seconds(time_str):
 
319
  )
320
  text = text[0]["text"]
321
 
322
+ # Print the text before clean_and_emoji_annotate_speech
323
+ print(f"Text before clean_and_emoji_annotate_speech: {text}")
324
 
325
+ text = clean_and_emoji_annotate_speech(text)
326
 
327
  # Handle empty transcriptions
328
  if not text.strip():