terry-li-hm
commited on
Commit
·
8a46051
1
Parent(s):
9844c20
Update `sv.py`
Browse files
sv.py
CHANGED
@@ -114,31 +114,44 @@ def format_text_with_emojis(s):
|
|
114 |
return s.strip()
|
115 |
|
116 |
|
117 |
-
def
|
118 |
-
def
|
119 |
-
return s
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
132 |
continue
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
|
144 |
def time_to_seconds(time_str):
|
@@ -306,10 +319,10 @@ def process_audio(audio_path, language="yue", fs=16000):
|
|
306 |
)
|
307 |
text = text[0]["text"]
|
308 |
|
309 |
-
# Print the text before
|
310 |
-
print(f"Text before
|
311 |
|
312 |
-
text =
|
313 |
|
314 |
# Handle empty transcriptions
|
315 |
if not text.strip():
|
|
|
114 |
return s.strip()
|
115 |
|
116 |
|
117 |
+
def clean_and_emoji_annotate_speech(text):
|
118 |
+
def get_emoji(s, emoji_set):
|
119 |
+
return next((char for char in s if char in emoji_set), None)
|
120 |
+
|
121 |
+
# Replace special tags
|
122 |
+
text = text.replace("<|nospeech|><|Event_UNK|>", "❓")
|
123 |
+
for lang, replacement in lang_dict.items():
|
124 |
+
text = text.replace(lang, replacement)
|
125 |
+
|
126 |
+
# Process each language segment
|
127 |
+
segments = [
|
128 |
+
format_text_with_emojis(segment.strip()) for segment in text.split("<|lang|>")
|
129 |
+
]
|
130 |
+
|
131 |
+
formatted_segments = []
|
132 |
+
prev_event = prev_emotion = None
|
133 |
+
|
134 |
+
for segment in segments:
|
135 |
+
if not segment:
|
136 |
continue
|
137 |
+
|
138 |
+
current_event = get_emoji(segment, event_set)
|
139 |
+
current_emotion = get_emoji(
|
140 |
+
segment, emo_set
|
141 |
+
) # Check for emotion emoji anywhere in the segment
|
142 |
+
|
143 |
+
if current_event is not None:
|
144 |
+
segment = segment[1:] if segment.startswith(current_event) else segment
|
145 |
+
|
146 |
+
# Preserve emotion emoji if it's different from the previous one
|
147 |
+
if current_emotion is not None and current_emotion != prev_emotion:
|
148 |
+
segment = segment.replace(current_emotion, "") + current_emotion
|
149 |
+
|
150 |
+
formatted_segments.append(segment.strip())
|
151 |
+
prev_event, prev_emotion = current_event, current_emotion
|
152 |
+
|
153 |
+
result = " ".join(formatted_segments).replace("The.", "").strip()
|
154 |
+
return result
|
155 |
|
156 |
|
157 |
def time_to_seconds(time_str):
|
|
|
319 |
)
|
320 |
text = text[0]["text"]
|
321 |
|
322 |
+
# Print the text before clean_and_emoji_annotate_speech
|
323 |
+
print(f"Text before clean_and_emoji_annotate_speech: {text}")
|
324 |
|
325 |
+
text = clean_and_emoji_annotate_speech(text)
|
326 |
|
327 |
# Handle empty transcriptions
|
328 |
if not text.strip():
|