terry-li-hm
commited on
Commit
ยท
31e1773
1
Parent(s):
8a46051
Update sv.py
Browse files
sv.py
CHANGED
@@ -91,32 +91,31 @@ emo_set = {"๐", "๐", "๐ก", "๐ฐ", "๐คข", "๐ฎ"}
|
|
91 |
event_set = {"๐ผ", "๐", "๐", "๐ญ", "๐คง", "๐ท"}
|
92 |
|
93 |
|
94 |
-
def
|
95 |
-
|
96 |
-
|
97 |
-
for sptk in emoji_dict:
|
98 |
-
s = s.replace(sptk, "")
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
if sptk_dict.get(e, 0) > sptk_dict.get(emo, 0):
|
103 |
-
emo = e
|
104 |
|
105 |
-
|
106 |
-
|
107 |
-
+ s
|
108 |
-
+ emo_dict[emo]
|
109 |
-
)
|
110 |
|
111 |
-
|
112 |
-
|
|
|
|
|
113 |
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
|
|
|
|
|
116 |
|
117 |
-
|
118 |
-
def get_emoji(s, emoji_set):
|
119 |
-
return next((char for char in s if char in emoji_set), None)
|
120 |
|
121 |
# Replace special tags
|
122 |
text = text.replace("<|nospeech|><|Event_UNK|>", "โ")
|
@@ -136,14 +135,11 @@ def clean_and_emoji_annotate_speech(text):
|
|
136 |
continue
|
137 |
|
138 |
current_event = get_emoji(segment, event_set)
|
139 |
-
current_emotion = get_emoji(
|
140 |
-
segment, emo_set
|
141 |
-
) # Check for emotion emoji anywhere in the segment
|
142 |
|
143 |
if current_event is not None:
|
144 |
segment = segment[1:] if segment.startswith(current_event) else segment
|
145 |
|
146 |
-
# Preserve emotion emoji if it's different from the previous one
|
147 |
if current_emotion is not None and current_emotion != prev_emotion:
|
148 |
segment = segment.replace(current_emotion, "") + current_emotion
|
149 |
|
|
|
91 |
event_set = {"๐ผ", "๐", "๐", "๐ญ", "๐คง", "๐ท"}
|
92 |
|
93 |
|
94 |
+
def clean_and_emoji_annotate_speech(text):
|
95 |
+
def get_emoji(s, emoji_set):
|
96 |
+
return next((char for char in s if char in emoji_set), None)
|
|
|
|
|
97 |
|
98 |
+
def format_text_with_emojis(s):
|
99 |
+
sptk_dict = {sptk: s.count(sptk) for sptk in emoji_dict}
|
|
|
|
|
100 |
|
101 |
+
for sptk in emoji_dict:
|
102 |
+
s = s.replace(sptk, "")
|
|
|
|
|
|
|
103 |
|
104 |
+
emo = "<|NEUTRAL|>"
|
105 |
+
for e in emo_dict:
|
106 |
+
if sptk_dict.get(e, 0) > sptk_dict.get(emo, 0):
|
107 |
+
emo = e
|
108 |
|
109 |
+
s = (
|
110 |
+
"".join(event_dict[e] for e in event_dict if sptk_dict.get(e, 0) > 0)
|
111 |
+
+ s
|
112 |
+
+ emo_dict[emo]
|
113 |
+
)
|
114 |
|
115 |
+
for emoji in emo_set.union(event_set):
|
116 |
+
s = s.replace(f" {emoji}", emoji).replace(f"{emoji} ", emoji)
|
117 |
|
118 |
+
return s.strip()
|
|
|
|
|
119 |
|
120 |
# Replace special tags
|
121 |
text = text.replace("<|nospeech|><|Event_UNK|>", "โ")
|
|
|
135 |
continue
|
136 |
|
137 |
current_event = get_emoji(segment, event_set)
|
138 |
+
current_emotion = get_emoji(segment, emo_set)
|
|
|
|
|
139 |
|
140 |
if current_event is not None:
|
141 |
segment = segment[1:] if segment.startswith(current_event) else segment
|
142 |
|
|
|
143 |
if current_emotion is not None and current_emotion != prev_emotion:
|
144 |
segment = segment.replace(current_emotion, "") + current_emotion
|
145 |
|