terry-li-hm commited on
Commit
31e1773
ยท
1 Parent(s): 8a46051

Update sv.py

Browse files
Files changed (1) hide show
  1. sv.py +20 -24
sv.py CHANGED
@@ -91,32 +91,31 @@ emo_set = {"๐Ÿ˜Š", "๐Ÿ˜”", "๐Ÿ˜ก", "๐Ÿ˜ฐ", "๐Ÿคข", "๐Ÿ˜ฎ"}
91
  event_set = {"๐ŸŽผ", "๐Ÿ‘", "๐Ÿ˜€", "๐Ÿ˜ญ", "๐Ÿคง", "๐Ÿ˜ท"}
92
 
93
 
94
- def format_text_with_emojis(s):
95
- sptk_dict = {sptk: s.count(sptk) for sptk in emoji_dict}
96
-
97
- for sptk in emoji_dict:
98
- s = s.replace(sptk, "")
99
 
100
- emo = "<|NEUTRAL|>"
101
- for e in emo_dict:
102
- if sptk_dict.get(e, 0) > sptk_dict.get(emo, 0):
103
- emo = e
104
 
105
- s = (
106
- "".join(event_dict[e] for e in event_dict if sptk_dict.get(e, 0) > 0)
107
- + s
108
- + emo_dict[emo]
109
- )
110
 
111
- for emoji in emo_set.union(event_set):
112
- s = s.replace(f" {emoji}", emoji).replace(f"{emoji} ", emoji)
 
 
113
 
114
- return s.strip()
 
 
 
 
115
 
 
 
116
 
117
- def clean_and_emoji_annotate_speech(text):
118
- def get_emoji(s, emoji_set):
119
- return next((char for char in s if char in emoji_set), None)
120
 
121
  # Replace special tags
122
  text = text.replace("<|nospeech|><|Event_UNK|>", "โ“")
@@ -136,14 +135,11 @@ def clean_and_emoji_annotate_speech(text):
136
  continue
137
 
138
  current_event = get_emoji(segment, event_set)
139
- current_emotion = get_emoji(
140
- segment, emo_set
141
- ) # Check for emotion emoji anywhere in the segment
142
 
143
  if current_event is not None:
144
  segment = segment[1:] if segment.startswith(current_event) else segment
145
 
146
- # Preserve emotion emoji if it's different from the previous one
147
  if current_emotion is not None and current_emotion != prev_emotion:
148
  segment = segment.replace(current_emotion, "") + current_emotion
149
 
 
91
  event_set = {"๐ŸŽผ", "๐Ÿ‘", "๐Ÿ˜€", "๐Ÿ˜ญ", "๐Ÿคง", "๐Ÿ˜ท"}
92
 
93
 
94
+ def clean_and_emoji_annotate_speech(text):
95
+ def get_emoji(s, emoji_set):
96
+ return next((char for char in s if char in emoji_set), None)
 
 
97
 
98
+ def format_text_with_emojis(s):
99
+ sptk_dict = {sptk: s.count(sptk) for sptk in emoji_dict}
 
 
100
 
101
+ for sptk in emoji_dict:
102
+ s = s.replace(sptk, "")
 
 
 
103
 
104
+ emo = "<|NEUTRAL|>"
105
+ for e in emo_dict:
106
+ if sptk_dict.get(e, 0) > sptk_dict.get(emo, 0):
107
+ emo = e
108
 
109
+ s = (
110
+ "".join(event_dict[e] for e in event_dict if sptk_dict.get(e, 0) > 0)
111
+ + s
112
+ + emo_dict[emo]
113
+ )
114
 
115
+ for emoji in emo_set.union(event_set):
116
+ s = s.replace(f" {emoji}", emoji).replace(f"{emoji} ", emoji)
117
 
118
+ return s.strip()
 
 
119
 
120
  # Replace special tags
121
  text = text.replace("<|nospeech|><|Event_UNK|>", "โ“")
 
135
  continue
136
 
137
  current_event = get_emoji(segment, event_set)
138
+ current_emotion = get_emoji(segment, emo_set)
 
 
139
 
140
  if current_event is not None:
141
  segment = segment[1:] if segment.startswith(current_event) else segment
142
 
 
143
  if current_emotion is not None and current_emotion != prev_emotion:
144
  segment = segment.replace(current_emotion, "") + current_emotion
145