Spaces:

Dragneel
/

Recon

Running

App Files Files Community

prasant.goswivt commited on Oct 12, 2023

Commit

dabad06

1 Parent(s): fd334ba

added sentiment analysis

Browse files

Files changed (11) hide show

__pycache__/sentiment.cpython-310.pyc +0 -0
data/novel_list.pkl +3 -0
data/sentiment_analysis/Genius Seventh Prince_results.pkl +3 -0
data/sentiment_analysis/Lord of the mysteries_results.pkl +3 -0
data/sentiment_analysis/Mother of Learning_results.pkl +3 -0
data/sentiment_analysis/The Perfect Run_results.pkl +3 -0
data/similarity.pkl +3 -0
sentiment.py +155 -0
static/images/wordcloud/Lord of the mysteries_cloud.png +0 -0
static/images/wordcloud/Mother of Learning_cloud.png +0 -0
static/images/wordcloud/The Perfect Run_cloud.png +0 -0

__pycache__/sentiment.cpython-310.pyc ADDED Viewed

Binary file (4.45 kB). View file

data/novel_list.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5af2f404aed4b4de777b9b2c0cb2fc1a744f9c95332f1a2b96ea4eb514d5a9aa
+size 4886535

data/sentiment_analysis/Genius Seventh Prince_results.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74aafc6b657ba1e59d202e97046cd1699c102750e3fb02a60360bc9640ec6869
+size 19

data/sentiment_analysis/Lord of the mysteries_results.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f18a626152311f0e9202975c342ddcfa9ecc343006ea7ea133b1fe3c708b235f
+size 21735

data/sentiment_analysis/Mother of Learning_results.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:628f6afab594ad5d7a0f232c48f61c4d935dbea168fb6e166dc78ac680d31518
+size 21848

data/sentiment_analysis/The Perfect Run_results.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5a3c88f7c51bb3bb492cc7b10bfd9c4d670b30d8ae8a714319d71cf34b021ae
+size 21740

data/similarity.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96a69cd6b71b1342f594eabb095aab1b619c27d1004b4b34e2725ffbe838f1a1
+size 1313383915

sentiment.py ADDED Viewed

	@@ -0,0 +1,155 @@

+from flask import Flask, request, render_template
+import pickle
+import os
+import praw
+import torch
+from transformers import RobertaTokenizer, RobertaForSequenceClassification
+import nltk
+from nltk.stem.porter import PorterStemmer
+from nltk.corpus import stopwords
+import spacy
+import string
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+def save_data(data, filename):
+    with open(filename, 'wb') as file:
+        pickle.dump(data, file)
+def load_data(filename):
+    if os.path.exists(filename):
+        with open(filename, 'rb') as file:
+            return pickle.load(file)
+    else:
+        return None
+# PRAW configs
+REDDIT_CLIENT_ID = "lI0C_W9_eESoiS2mtUMNDg"
+REDDIT_CLIENT_SECRET = "IK1Vn7s0EZGiNt6vMZ54sfT6pYvbHA"
+REDDIT_USERNAME = "Tiger_in_the_Snow"
+reddit = praw.Reddit(
+    client_id=REDDIT_CLIENT_ID,
+    client_secret=REDDIT_CLIENT_SECRET,
+    user_agent=f"script:sentiment-analysis:v0.0.1 (by {REDDIT_USERNAME})"
+)
+# NLP configs
+stemmer = PorterStemmer()
+nlp = spacy.load("en_core_web_sm")
+nltk.download('punkt')
+# Model configs
+tokenizer = RobertaTokenizer.from_pretrained('aychang/roberta-base-imdb')
+model = RobertaForSequenceClassification.from_pretrained(
+    'aychang/roberta-base-imdb', num_labels=2)
+model.classifier = torch.nn.Linear(768, 2)
+def get_sentiment(query):
+    print(query)
+    filename = f"D:/projects/Recon/data/sentiment_analysis/{query}_results.pkl"
+    saved_data = load_data(filename)
+    if saved_data:
+        positive, negative, _ = saved_data
+        wordcloud = f'static/images/wordcloud/{query}_cloud.png'
+        return positive, negative, wordcloud
+    else:
+        results = get_reddit_results(query)
+        if not results:
+            error = "No results found for query"
+            return error
+        positive, negative, wordcloud = analyze_comments(
+            results, query=query)
+        print(f'positive:{positive}')
+        save_data((positive, negative, wordcloud), filename)
+        return positive, negative, f'static/images/wordcloud/{query}_cloud.png'
+def get_reddit_results(query):
+    print(query)
+    sub = reddit.subreddit('noveltranslations+progressionfantasy')
+    results = sub.search(query, limit=1)
+    print(results)
+    return list(results)
+def transform_text(text):
+    text = text.lower()
+    text = nltk.word_tokenize(text)
+    text = [i for i in text if i.isalnum()]
+    text = [i for i in text if i not in stopwords.words(
+        'english') and i not in string.punctuation]
+    text = [stemmer.stem(i) for i in text]
+    return ' '.join(text)
+def tokenize(text):
+    doc = nlp(text)
+    return [token.text for token in doc]
+def analyze_comments(results, query):
+    total_positive = 0
+    total_negative = 0
+    total_comments = 0
+    comments_for_cloud = []
+    for submission in results:
+        submission.comments.replace_more(limit=None)
+        all_comments = submission.comments.list()
+        for comment in all_comments:
+            comment_body = comment.body
+            text = transform_text(comment_body)
+            comments_for_cloud.append(comment_body)
+            if text:
+                tokens = tokenize(text)
+                tokenized_input = tokenizer(
+                    tokens, return_tensors='pt', truncation=True, padding=True)
+                outputs = model(**tokenized_input)
+                probabilities = torch.softmax(outputs.logits, dim=-1)
+                mean_probabilities = probabilities.mean(dim=1)
+                positive_pct = mean_probabilities[0][1].item() * 100
+                negative_pct = mean_probabilities[0][0].item() * 100
+                total_positive += positive_pct
+                total_negative += negative_pct
+                total_comments += 1
+    if total_comments > 0:
+        avg_positive = total_positive / total_comments
+        avg_negative = total_negative / total_comments
+    else:
+        avg_positive = 0
+        avg_negative = 0
+    if total_comments > 0:
+        all_comments_string = ' '.join(comments_for_cloud)
+        wordcloud = WordCloud(width=400, height=400,
+                              background_color='white',
+                              max_words=30,
+                              stopwords=stopwords.words('english'),
+                              min_font_size=10).generate(all_comments_string)
+     # Save the WordCloud image as a static file
+        wordcloud.to_file(
+            f'D:/projects/Recon/static/images/wordcloud/{query}_cloud.png')
+    else:
+        wordcloud = None
+    print(f'positive:{avg_positive}')
+    return round(avg_positive), round(avg_negative), wordcloud

static/images/wordcloud/Lord of the mysteries_cloud.png ADDED Viewed

static/images/wordcloud/Mother of Learning_cloud.png ADDED Viewed

static/images/wordcloud/The Perfect Run_cloud.png ADDED Viewed