Dragneel commited on
Commit
47f1219
·
1 Parent(s): 87458e6

Update sentiment.py

Browse files

Added: Removing URLs from comments

Files changed (1) hide show
  1. sentiment.py +6 -4
sentiment.py CHANGED
@@ -80,7 +80,7 @@ def get_reddit_results(query):
80
 
81
  try:
82
  sub = reddit.subreddit('noveltranslations+progressionfantasy')
83
- results = sub.search(query, limit=5)
84
 
85
 
86
  results_list = list(results)
@@ -98,7 +98,9 @@ def get_reddit_results(query):
98
 
99
 
100
  def transform_text(text):
101
-
 
 
102
  text = text.lower()
103
  text = nltk.word_tokenize(text)
104
 
@@ -133,9 +135,9 @@ def analyze_comments(results, query):
133
  for comment in all_comments:
134
 
135
  comment_body = comment.body
136
- print(comment_body)
137
  text = transform_text(comment_body)
138
- print(text)
139
  comments_for_cloud.append(comment_body)
140
 
141
  if text:
 
80
 
81
  try:
82
  sub = reddit.subreddit('noveltranslations+progressionfantasy')
83
+ results = sub.search(query, limit=2)
84
 
85
 
86
  results_list = list(results)
 
98
 
99
 
100
  def transform_text(text):
101
+ url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
102
+ text = url_pattern.sub('', text)
103
+
104
  text = text.lower()
105
  text = nltk.word_tokenize(text)
106
 
 
135
  for comment in all_comments:
136
 
137
  comment_body = comment.body
138
+
139
  text = transform_text(comment_body)
140
+
141
  comments_for_cloud.append(comment_body)
142
 
143
  if text: