terry-li-hm
commited on
Commit
·
44b0320
1
Parent(s):
a885210
Update `sv.py`
Browse files
sv.py
CHANGED
@@ -92,22 +92,25 @@ event_set = {"🎼", "👏", "😀", "😭", "🤧", "😷"}
|
|
92 |
|
93 |
|
94 |
def format_str_v2(s):
|
95 |
-
sptk_dict = {}
|
|
|
96 |
for sptk in emoji_dict:
|
97 |
-
sptk_dict[sptk] = s.count(sptk)
|
98 |
s = s.replace(sptk, "")
|
|
|
99 |
emo = "<|NEUTRAL|>"
|
100 |
for e in emo_dict:
|
101 |
-
if sptk_dict
|
102 |
emo = e
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
107 |
|
108 |
for emoji in emo_set.union(event_set):
|
109 |
-
s = s.replace(" "
|
110 |
-
|
111 |
return s.strip()
|
112 |
|
113 |
|
@@ -302,6 +305,10 @@ def process_audio(audio_path, language="yue", fs=16000):
|
|
302 |
merge_vad=True,
|
303 |
)
|
304 |
text = text[0]["text"]
|
|
|
|
|
|
|
|
|
305 |
text = format_str_v3(text)
|
306 |
|
307 |
# Handle empty transcriptions
|
|
|
92 |
|
93 |
|
94 |
def format_str_v2(s):
|
95 |
+
sptk_dict = {sptk: s.count(sptk) for sptk in emoji_dict}
|
96 |
+
|
97 |
for sptk in emoji_dict:
|
|
|
98 |
s = s.replace(sptk, "")
|
99 |
+
|
100 |
emo = "<|NEUTRAL|>"
|
101 |
for e in emo_dict:
|
102 |
+
if sptk_dict.get(e, 0) > sptk_dict.get(emo, 0):
|
103 |
emo = e
|
104 |
+
|
105 |
+
s = (
|
106 |
+
"".join(event_dict[e] for e in event_dict if sptk_dict.get(e, 0) > 0)
|
107 |
+
+ s
|
108 |
+
+ emo_dict[emo]
|
109 |
+
)
|
110 |
|
111 |
for emoji in emo_set.union(event_set):
|
112 |
+
s = s.replace(f" {emoji}", emoji).replace(f"{emoji} ", emoji)
|
113 |
+
|
114 |
return s.strip()
|
115 |
|
116 |
|
|
|
305 |
merge_vad=True,
|
306 |
)
|
307 |
text = text[0]["text"]
|
308 |
+
|
309 |
+
# Print the text before format_str_v3
|
310 |
+
print(f"Text before format_str_v3: {text}")
|
311 |
+
|
312 |
text = format_str_v3(text)
|
313 |
|
314 |
# Handle empty transcriptions
|