Update sentiment.py
Browse filesAdded: Removing URLs from comments
- sentiment.py +6 -4
sentiment.py
CHANGED
@@ -80,7 +80,7 @@ def get_reddit_results(query):
|
|
80 |
|
81 |
try:
|
82 |
sub = reddit.subreddit('noveltranslations+progressionfantasy')
|
83 |
-
results = sub.search(query, limit=
|
84 |
|
85 |
|
86 |
results_list = list(results)
|
@@ -98,7 +98,9 @@ def get_reddit_results(query):
|
|
98 |
|
99 |
|
100 |
def transform_text(text):
|
101 |
-
|
|
|
|
|
102 |
text = text.lower()
|
103 |
text = nltk.word_tokenize(text)
|
104 |
|
@@ -133,9 +135,9 @@ def analyze_comments(results, query):
|
|
133 |
for comment in all_comments:
|
134 |
|
135 |
comment_body = comment.body
|
136 |
-
|
137 |
text = transform_text(comment_body)
|
138 |
-
|
139 |
comments_for_cloud.append(comment_body)
|
140 |
|
141 |
if text:
|
|
|
80 |
|
81 |
try:
|
82 |
sub = reddit.subreddit('noveltranslations+progressionfantasy')
|
83 |
+
results = sub.search(query, limit=2)
|
84 |
|
85 |
|
86 |
results_list = list(results)
|
|
|
98 |
|
99 |
|
100 |
def transform_text(text):
|
101 |
+
url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
|
102 |
+
text = url_pattern.sub('', text)
|
103 |
+
|
104 |
text = text.lower()
|
105 |
text = nltk.word_tokenize(text)
|
106 |
|
|
|
135 |
for comment in all_comments:
|
136 |
|
137 |
comment_body = comment.body
|
138 |
+
|
139 |
text = transform_text(comment_body)
|
140 |
+
|
141 |
comments_for_cloud.append(comment_body)
|
142 |
|
143 |
if text:
|