terry-li-hm
commited on
Commit
ยท
9844c20
1
Parent(s):
44b0320
Update `sv.py`
Browse files
sv.py
CHANGED
@@ -91,7 +91,7 @@ emo_set = {"๐", "๐", "๐ก", "๐ฐ", "๐คข", "๐ฎ"}
|
|
91 |
event_set = {"๐ผ", "๐", "๐", "๐ญ", "๐คง", "๐ท"}
|
92 |
|
93 |
|
94 |
-
def
|
95 |
sptk_dict = {sptk: s.count(sptk) for sptk in emoji_dict}
|
96 |
|
97 |
for sptk in emoji_dict:
|
@@ -124,7 +124,7 @@ def format_str_v3(s):
|
|
124 |
s = s.replace("<|nospeech|><|Event_UNK|>", "โ")
|
125 |
for lang in lang_dict:
|
126 |
s = s.replace(lang, "<|lang|>")
|
127 |
-
s_list = [
|
128 |
new_s = " " + s_list[0]
|
129 |
cur_ent_event = get_event(new_s)
|
130 |
for i in range(1, len(s_list)):
|
|
|
91 |
event_set = {"๐ผ", "๐", "๐", "๐ญ", "๐คง", "๐ท"}
|
92 |
|
93 |
|
94 |
+
def format_text_with_emojis(s):
|
95 |
sptk_dict = {sptk: s.count(sptk) for sptk in emoji_dict}
|
96 |
|
97 |
for sptk in emoji_dict:
|
|
|
124 |
s = s.replace("<|nospeech|><|Event_UNK|>", "โ")
|
125 |
for lang in lang_dict:
|
126 |
s = s.replace(lang, "<|lang|>")
|
127 |
+
s_list = [format_text_with_emojis(s_i).strip(" ") for s_i in s.split("<|lang|>")]
|
128 |
new_s = " " + s_list[0]
|
129 |
cur_ent_event = get_event(new_s)
|
130 |
for i in range(1, len(s_list)):
|