Danielrahmai1991 commited on
Commit
7c7d796
·
verified ·
1 Parent(s): b2bb020

Update preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +1 -1
preprocessing.py CHANGED
@@ -71,7 +71,7 @@ def clean_text(text):
71
  # Remove URLs, emails, and other patterns
72
  text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
73
  text = re.sub(r"\s+", " ", text) # Replace multiple spaces with a single space
74
- text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # Keep only Persian characters and spaces
75
  return text.strip()
76
 
77
 
 
71
  # Remove URLs, emails, and other patterns
72
  text = re.sub(r"http\S+|www\S+|https\S+", "", text, flags=re.MULTILINE)
73
  text = re.sub(r"\s+", " ", text) # Replace multiple spaces with a single space
74
+ # text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # Keep only Persian characters and spaces
75
  return text.strip()
76
 
77