Spaces:
Runtime error
Runtime error
| import re | |
| emoji_pattern = re.compile( | |
| "[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
| u"\U00002702-\U000027B0" | |
| u"\U000024C2-\U0001F251" | |
| "]+", | |
| flags=re.UNICODE, | |
| ) | |
| def clean_text(x): | |
| x = x.lower() # lowercase | |
| x = x.encode("ascii", "ignore").decode() # unicode | |
| x = re.sub(r"https*\S+", " ", x) # url | |
| x = re.sub(r"@\S+", " ", x) # mentions | |
| x = re.sub(r"#\S+", " ", x) # hastags | |
| x = x.replace("'", "") # remove ticks | |
| # x = re.sub("[%s]" % re.escape(string.punctuation), " ", x) # punctuation | |
| # x = re.sub(r"\w*\d+\w*", "", x) # numbers | |
| x = re.sub(r"\s{2,}", " ", x) # over spaces | |
| x = emoji_pattern.sub(r"", x) # emojis | |
| x = re.sub("[^A-Za-z0-9]+", " ", x) # special charachters | |
| return x | |