Spaces:

hlopez
/

Twitter-Positivity-Analyzer

Runtime error

Hector Lopez commited on May 10, 2022

Commit

c5b702e

1 Parent(s): c6d3bd0

Upload application logic

Files changed (4) hide show

app.py ADDED Viewed

+"""
+Gradio Twitter analizer application.
+This module provides a gradio-based web application
+for the Twitter analyzer project.
+"""
+import gradio as gr
+from tweet_scraper import retrieve_tweet_text
+from backend import predict_positivity
+def process_tweet(url: str) -> str:
+    """
+    Get a tweet's positivity.
+    Args:
+        url (str): Tweet's URL.
+    Returns:
+        str: Predicted positivity
+    """
+    text = retrieve_tweet_text(url)
+    outcome = predict_positivity(text)
+    return outcome
+app = gr.Interface(
+    fn=process_tweet,
+    inputs=gr.inputs.Textbox(lines=2, placeholder="Tweet url..."),
+    outputs="text",
+)
+if __name__ == "__main__":
+    app, local_url, share_url = app.launch()

backend.py ADDED Viewed

+"""
+Positivity predictor.
+This module provides the functionality to predict
+a tweet's positivity using a BERT model.
+"""
+import torch
+from transformers import BertForSequenceClassification, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)
+model = BertForSequenceClassification.from_pretrained(
+    "bert-base-uncased",
+    num_labels=5,
+    output_attentions=False,
+    output_hidden_states=False,
+    local_files_only=True,
+)
+model.load_state_dict(torch.load("data/BERT_ft_epoch5.model"))
+model.eval()
+def predict_positivity(text: str) -> str:
+    """
+    Predict the positivity of a given tweet.
+    Args:
+        text (str): Tweet's text.
+    Returns:
+        str: Predicted positivity.
+    """
+    label_dict = {
+        0: "Extremely Negative",
+        1: "Negative",
+        2: "Neutral",
+        3: "Positive",
+        4: "Extremely Positive",
+    }
+    encoded = tokenizer(text, return_tensors="pt")
+    logits = model(**encoded).logits
+    predicted_class_id = logits.argmax().item()
+    return label_dict[predicted_class_id]

requirements.txt ADDED Viewed

+torch
+transformers
+gradio

tweet_scraper.py ADDED Viewed

+"""
+Twitter scraper.
+This module provides the functionality to retrieve
+a tweet's text given a tweet's URL.
+"""
+import re
+import requests
+def retrieve_tweet_text(tweet_url: str) -> str:
+    """
+    Retrieve a tweet's text.
+    Args:
+        tweet_url (url): Tweet's URL.
+    Returns:
+        str: Tweet's parsed text.
+    """
+    # Get the url to retrieve tweet-related data
+    url = (
+        "https://publish.twitter.com/oembed?dnt=true",
+        f"&omit_script=true&url={tweet_url}",
+    )
+    url = str.join("", url)
+    # Get the raw html containing th tweet text
+    raw_html = requests.get(url).json()["html"]
+    # Remove links from text
+    pattern = r"<[a][^>]*>(.+?)</[a]>"
+    html = re.sub(pattern, "", raw_html)
+    # Remove the HTML tags from the text
+    text = [i.strip() for i in re.sub("<.*?>", "", html).splitlines() if i][0]
+    # If there is a picture, remove all the text after it
+    if "pic" in text:
+        idx = text.index("pic")
+        text = text[:idx]
+    # If there is no picture, the &mdash defines the tweet's
+    # end.
+    elif "&mdash" in text:
+        idx = text.index("&mdash")
+        text = text[:idx]
+    return text